def test_model_ridge_classifier_int(self):
     model, X = fit_classification_model(linear_model.RidgeClassifier(),
                                         5,
                                         is_int=True)
     model_onnx = convert_sklearn(
         model,
         "multi-class ridge classifier",
         [("input", Int64TensorType([None, X.shape[1]]))],
     )
     self.assertIsNotNone(model_onnx)
     dump_data_and_model(
         X,
         model,
         model_onnx,
         basename="SklearnRidgeClassifierInt",
         allow_failure="StrictVersion(onnxruntime.__version__)"
         " <= StrictVersion('0.2.1')",
     )
def lines(x_train, x_test, y_train, y_test):

    res = []
    m = linear_model.RidgeClassifier()
    m.fit(x_train, y_train)
    predictions = m.predict(x_test)
    acc = accuracy_score(y_test, predictions)

    res.append((acc, "RidgeClassifier"))

    m = linear_model.SGDClassifier()
    m.fit(x_train, y_train)
    predictions = m.predict(x_test)
    acc = accuracy_score(y_test, predictions)

    res.append((acc, "SGDClassifier"))

    return res
示例#3
0
 def get_model(self):
     if self.problem_type == "regression":
         if self.regularization == "l1":
             model = lm.LassoLars(eps=1e-8)
         elif self.regularization == "l2":
             model = lm.Ridge(normalize=True, random_state=self.random_state)
         else:
             raise ValueError(f"Unknown regularization {self.regularization}")
     elif self.problem_type == "classification":
         if self.regularization == "l1":
             model = lm.LogisticRegression(penalty="l1", solver="saga", class_weight="balanced",
                                           random_state=self.random_state)
         elif self.regularization == "l2":
             model = lm.RidgeClassifier(normalize=True, random_state=self.random_state)
         else:
             raise ValueError(f"Unknown regularization {self.regularization}")
     else:
         raise ValueError("Unknown problem_type %r - not performing noise filtering." % self.problem_type)
     return model
示例#4
0
def build_pipeline(hp):
    n_components = hp.Choice("n_components", [2, 5, 10], default=5)
    pca = decomposition.PCA(n_components=n_components)

    model_type = hp.Choice("model_type", ["random_forest", "ridge"])
    if model_type == "random_forest":
        with hp.conditional_scope("model_type", "random_forest"):
            model = ensemble.RandomForestClassifier(
                n_estimators=hp.Int("n_estimators", 10, 50, step=10),
                max_depth=hp.Int("max_depth", 3, 10),
            )
    elif model_type == "ridge":
        with hp.conditional_scope("model_type", "ridge"):
            model = linear_model.RidgeClassifier(
                alpha=hp.Float("alpha", 1e-3, 1, sampling="log"))
    else:
        raise ValueError("Unrecognized model_type")

    skpipeline = pipeline.Pipeline([("pca", pca), ("clf", model)])
    return skpipeline
示例#5
0
def build_pipeline(hp):
    n_components = hp.Choice("n_components", [2, 5, 10], default=5)
    pca = decomposition.PCA(n_components=n_components)

    model_type = hp.Choice('model_type', ['random_forest', 'ridge'])
    if model_type == 'random_forest':
        with hp.conditional_scope('model_type', 'random_forest'):
            model = ensemble.RandomForestClassifier(
                n_estimators=hp.Int('n_estimators', 10, 50, step=10),
                max_depth=hp.Int('max_depth', 3, 10))
    elif model_type == 'ridge':
        with hp.conditional_scope('model_type', 'ridge'):
            model = linear_model.RidgeClassifier(
                alpha=hp.Float('alpha', 1e-3, 1, sampling='log'))
    else:
        raise ValueError('Unrecognized model_type')

    skpipeline = pipeline.Pipeline([
        ('pca', pca),
        ('clf', model)
        ])
    return skpipeline
示例#6
0
 def r_classifier(X,
                  y,
                  alpha=1.0,
                  fit_intercept=True,
                  normalize=True,
                  solver='auto',
                  max_iter=1000,
                  tol=0.0001):
     reg = linear_model.RidgeClassifier(alpha=alpha,
                                        fit_intercept=fit_intercept,
                                        normalize=normalize,
                                        max_iter=max_iter,
                                        tol=0.001,
                                        solver='auto',
                                        random_state=30)
     print_performance(reg,
                       X,
                       y,
                       model='Ridge Calssifier',
                       scores=['accuracy'])
     reg.fit(X, y)
     return reg
示例#7
0
    def classification_analysis(self):

        tmp = dict()
        #linear
        tmp['logic'] = feature_selection.RFECV(lm.LogisticRegression(), cv=5, n_jobs = self.n_jobs).fit(self.x,self.y).ranking_
        tmp['ridge'] = feature_selection.RFECV(lm.RidgeClassifier(), cv=5, n_jobs = self.n_jobs).fit(self.x,self.y).ranking_
        tmp['SGD'] = feature_selection.RFECV(lm.SGDClassifier(), cv=5, n_jobs = self.n_jobs).fit(self.x,self.y).ranking_
        tmp['lm_svm'] = feature_selection.RFECV(svm.LinearSVC(), cv=5, n_jobs = self.n_jobs).fit(self.x,self.y).ranking_

        #non-linear
        tmp['ADABoost'] = feature_selection.RFECV(ensemble.AdaBoostClassifier(), cv=5, n_jobs = self.n_jobs).fit(self.x,self.y).ranking_
        tmp['RandomForest'] = feature_selection.RFECV(ensemble.RandomForestClassifier(), cv=5, n_jobs = self.n_jobs).fit(self.x,self.y).ranking_

        #stats
        chi = feature_selection.chi2(self.x,self.y)
        tmp['chi2'] = chi[0]
        tmp['chi2_pval'] = chi[1]
        fscore = feature_selection.f_classif(self.x,self.y)
        tmp['f_score'] = fscore[0]
        tmp['f_pval'] = fscore[1]
        tmp['MIC'] = feature_selection.mutual_info_classif(self.x,self.y)

        return tmp
示例#8
0
def main():
    ## Load in the training and testing data
    train_df = pd.read_csv(os.path.join(DATA_DIR, "train.csv"), index_col="id")
    test_df = pd.read_csv(os.path.join(DATA_DIR, "test.csv"), index_col='id')

    count_vectorizer = feature_extraction.text.CountVectorizer()

    train_vectors = count_vectorizer.fit_transform(train_df["text"])
    test_vectors = count_vectorizer.transform(test_df["text"])

    clf = linear_model.RidgeClassifier()
    scores = model_selection.cross_val_score(clf,
                                             train_vectors,
                                             train_df["target"],
                                             cv=3,
                                             scoring="f1")
    print(scores)

    clf.fit(train_vectors, train_df["target"])
    save_model(clf, 'tutorial')
    preds_test = clf.predict(test_vectors)
    print(preds_test)
    create_submission("sample_submission.csv", preds_test, test_df)
示例#9
0
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=1)

clf = linear_model.LogisticRegression(C=1.0, penalty='l1', tol=1e-6)
bagging_clf = BaggingClassifier(clf,
                                n_estimators=20,
                                max_samples=0.8,
                                max_features=1.0,
                                bootstrap=True,
                                bootstrap_features=False)
bagging_clf.fit(X_train, y_train)

treemodel = DecisionTreeClassifier()
#treemodel=BaggingClassifier(treemodel,n_estimators=20, max_samples=0.8, max_features=1.0, bootstrap=True, bootstrap_features=False)
treemodel.fit(X_train, y_train)

randomtree = linear_model.RidgeClassifier()
randomtree = BaggingClassifier(randomtree,
                               n_estimators=20,
                               max_samples=0.8,
                               max_features=1.0,
                               bootstrap=True,
                               bootstrap_features=False)
randomtree.fit(X_train, y_train)

sgd = linear_model.SGDClassifier(tol=1e-3)
sgd = BaggingClassifier(sgd,
                        n_estimators=20,
                        max_samples=0.8,
                        max_features=1.0,
                        bootstrap=True,
                        bootstrap_features=False)
示例#10
0
def run_classifier(path, model='logistic_regression', seven_features=False, filterSpO2=True, load_model=False):

    if model == 'multi_class':
        data, df = read_data(path, multi_class=True, seven_features=seven_features, fitlerSpO2=filterSpO2)
    else:
        data, df = read_data(path, seven_features=seven_features, fitlerSpO2=filterSpO2)

    if filterSpO2:
        subSpO2Folder = 'FilteredSpO2/'
    else:
        subSpO2Folder = 'NoSpO2Filtering/'

    if seven_features:
        n_features = 7
    else:
        n_features = 3

    plot_data_file = 'Plots/data/precision_recall_' + str(n_features) + 'Features_' + 'filtered_' + str(filterSpO2) + '.csv'
    if not os.path.exists(plot_data_file):
        with open(plot_data_file, 'w', newline='') as file:
            csvwriter = csv.writer(file)
            csvwriter.writerow(['model', 'precisions', 'recalls', 'thresholds'])

    print("-----------------------------------------------")
    if model == 'multi_class':
        print("        MULTI CLASS CLASSIFIER")
    elif model == 'svc':
        print("        SUPPORT VECTOR CLASSIFIER")
    elif model == 'mlp':
        print("   MULTI-LAYER PERCEPTRON")
    else:
        print("             LOGISTIC REGRESSION")
    print("-----------------------------------------------")


    X = data[:, 0:-1]
    y = data[:, -1]
    n_split = 10
    kFold = model_selection.KFold(n_splits=n_split, shuffle=True, random_state=1)
    f1_scores = []
    bic_scores = []
    i = 0
    res = np.zeros((8, n_split))
    tprs = []

    prec_recall_curves = {'precision': [], 'recall': [], 'threshold':[]}

    mean_fpr = np.linspace(0, 1, 100)
    for train_index, test_index in kFold.split(X, y):
        print('Fold:',i + 1)
        X_train, X_test = X[train_index], X[test_index]
        y_train, y_test = y[train_index], y[test_index]

        y_preds_prob_total = []
        y_trues = []

        X_train_scale, X_test_scale = normalize(X_train, X_test, seven_features=seven_features)
        print('x train:',X_train_scale.shape)
        if model == 'svc':
            model_name = 'SVC'
            classifier = svm.SVC(gamma='auto', kernel='rbf', probability=True)
        elif model == 'sgd':
            model_name = 'SGD Classifier'
            classifier = linear_model.SGDClassifier(loss='log')
        elif model == 'ridge':
            model_name = 'Ridge Classifier'
            classifier = linear_model.RidgeClassifier()
        elif model == 'mlp':
            model_name = 'Neural Network'
            if seven_features:
                classifier = MLPClassifier((12, 8, 6, 4, 4), max_iter=200, activation='tanh', solver='adam', random_state=1, momentum=0.8) # current BEST 0.7798 accuracy
            else:
                classifier = MLPClassifier((6, 3), max_iter=200, activation='tanh', solver='adam', random_state=1, momentum=0.6) # current best

        else:
            model_name = 'Logistic Regression'
            classifier = linear_model.LogisticRegression()

        final_classifier = base.clone(classifier)
        if load_model: # load a pretrained model
            model_path = 'saved_models/classifer/' + subSpO2Folder + model_name.replace(' ', '_') + '_'  + str(n_features) + '_features.joblib'
            print('load file from:', model_path)
            classifier = load(model_path)
        else: # train a new model
            classifier.fit(X_train_scale, y_train)

        y_pred = classifier.predict(X_test_scale)
        y_pred_prob = classifier.predict_proba(X_test_scale)[:,1]

        y_preds_prob_total.append(y_pred_prob)
        y_trues.append(y_test)

        f1_scores.append(metrics.f1_score(y_test, y_pred))
        tn, fp, fn, tp = confusion_matrix(y_test, y_pred).ravel()
        res[0][i] = tp / (tp + fp) # PPV
        res[1][i] = tp / (tp + fn) # sensitivity
        res[2][i] = tn / (tn + fp) # specificity
        res[3][i] = tn / (fn + tn) # NPV
        roc_auc = roc_auc_score(y_test, y_pred_prob)
        res[4][i] = roc_auc
        res[5][i] = res[1][i]/(1 - res[2][i]) # Positive Likelyhood rato
        res[6][i] = (1 - res[1][i]) / res[2][i]  # Negative Likelyhood rato
        res[7][i] = metrics.accuracy_score(y_test, y_pred)
        bic_val = bic.bic(y_test, y_pred_prob, n_features)
        bic_scores.append(bic_val)
        aupr = metrics.average_precision_score(y_test, y_pred_prob)
        fpr, tpr, _ = metrics.roc_curve(y_test, y_pred_prob)

        tprs.append(interp(mean_fpr, fpr, tpr))
        #plt.plot(fpr, tpr, lw=2, alpha=0.3, label='ROC fold %d (AUC = %0.2f)' % (i, roc_auc))

        print('current f1:', f1_scores[-1], ' PPV:', res[0][i], ' Sensitivity:', res[1][i],
              ' Specificity:', res[2][i], ' NPV:', res[3][i], ' BIC:', bic_val,
              'ROAUC:',res[4][i], 'Accuracy:',res[7][i])
        i += 1

    # train a final model using entire set
    if not load_model:
        X, X_copy = normalize(X, X, seven_features=seven_features)
        final_classifier.fit(X, y)
        final_pred = final_classifier.predict(X)
        print('Final F1 ', metrics.f1_score(y, final_pred))
        if not os.path.exists('saved_models'):
            os.mkdir('saved_models')

        if not os.path.exists('saved_models/classifer'):
            os.mkdir('saved_models/classifer')

        model_output_path = 'saved_models/classifer/' + subSpO2Folder + model_name.replace(' ', '_') + '_' + str(n_features) + '_features.joblib'
        dump(final_classifier, model_output_path)

    res = np.mean(res, axis=1)

    print('\n=================================================')
    print('                      RESULT')
    print("--------------------------------------------------")
    print('  ', model_name)
    print('   Total cases:', y.shape)
    print('Seven Features:', seven_features, '  FilterSpO2:', filterSpO2)
    print('       Mean F1:', np.mean(np.array(f1_scores)))
    print('           PPV:', res[0])
    print('   Sensitivity:', res[1])
    print('   Specificity:', res[2])
    print('           NPV:', res[3])
    print('         ROAUC:', res[4])
    print('   Positive LR:', res[5])
    print('   Negative LR:', res[6])
    print('           BIC:', np.mean(np.array(bic_scores)))
    print('      Accuracy:', res[7])
    print('          AUPR:', aupr)
    print("\nBaseline")
    get_baseline(np.array([df['Spo2'], df['Fio2']]).T, y)
    print('==================================================')

    if model == 'multi_class':
        return

    precision_array, recall_array, thresholds = metrics.precision_recall_curve(np.array(y_trues).flatten(), np.array(y_pred_prob).flatten())

    with open(plot_data_file, 'a+', newline='') as file:
        csvwriter = csv.writer(file)
        csvwriter.writerow([model_name, str(precision_array), str(recall_array), str(thresholds)])
示例#11
0
        regression(linear_model.OrthogonalMatchingPursuit()),
        regression(linear_model.OrthogonalMatchingPursuitCV()),
        regression(linear_model.Ridge(random_state=RANDOM_SEED)),
        regression(linear_model.RidgeCV()),
        regression(linear_model.BayesianRidge()),
        regression(linear_model.ARDRegression()),
        regression(linear_model.SGDRegressor(random_state=RANDOM_SEED)),
        regression(
            linear_model.PassiveAggressiveRegressor(random_state=RANDOM_SEED)),

        # Logistic Regression
        classification(
            linear_model.LogisticRegression(random_state=RANDOM_SEED)),
        classification(
            linear_model.LogisticRegressionCV(random_state=RANDOM_SEED)),
        classification(linear_model.RidgeClassifier(random_state=RANDOM_SEED)),
        classification(linear_model.RidgeClassifierCV()),
        classification(linear_model.SGDClassifier(random_state=RANDOM_SEED)),
        classification_binary(
            linear_model.LogisticRegression(random_state=RANDOM_SEED)),
        classification_binary(
            linear_model.LogisticRegressionCV(random_state=RANDOM_SEED)),
        classification_binary(
            linear_model.RidgeClassifier(random_state=RANDOM_SEED)),
        classification_binary(linear_model.RidgeClassifierCV()),
        classification_binary(
            linear_model.SGDClassifier(random_state=RANDOM_SEED)),

        # Decision trees
        regression(tree.DecisionTreeRegressor(**TREE_PARAMS)),
        regression(tree.ExtraTreeRegressor(**TREE_PARAMS)),
示例#12
0
def train(data, **kwargs):
    clf = lm.RidgeClassifier(**kwargs)
    clf.fit(data[:, :-1], data[:, -1])
    return clf
    return [y_clf_train,y_clf_test,acc_clf_train,
            acc_clf_test,loss_clf_train,loss_clf_test]
def get_classifier_results():
    return pandas.DataFrame({'classifier':classifier_list,
                             'classifier_name':classifier_names,
                             'clf_dataset':clf_datasets,
                             'acc_train':acc_train,'acc_test':acc_test,
                             'loss_train':loss_train,'loss_test':loss_test})

classifier_list,classifier_names,clf_datasets=[],[],[]
acc_train,acc_test,loss_train,loss_test=[],[],[],[]
df_list=['classifier_name','acc_train','acc_test','loss_train','loss_test']
clf=[linear_model.LogisticRegression(solver='liblinear',multi_class='ovr'),
     linear_model.LogisticRegressionCV(solver='liblinear',multi_class='ovr'),
     linear_model.SGDClassifier(max_iter=1000,tol=0.00001),
     linear_model.RidgeClassifier(),linear_model.RidgeClassifierCV(),
     LinearDiscriminantAnalysis(),QuadraticDiscriminantAnalysis(),
     svm.LinearSVC(),svm.SVC(gamma='scale',C=10.0,kernel='poly'),
     svm.NuSVC(gamma='scale',kernel='poly'),
     KNeighborsClassifier(),RadiusNeighborsClassifier(radius=30),
     NearestCentroid(),
     DecisionTreeClassifier(),ExtraTreeClassifier(),GaussianNB(),
     BernoulliNB(),MultinomialNB(),
     BaggingClassifier(),RandomForestClassifier(n_estimators=64),
     AdaBoostClassifier(),GradientBoostingClassifier(),
     linear_model.Perceptron(max_iter=1000,tol=0.00001),
     linear_model.PassiveAggressiveClassifier(max_iter=1000,tol=0.00001),
     GaussianProcessClassifier(),LabelPropagation(),LabelSpreading()]

list3clf=['LogisticRegression','LogisticRegressionCV','SGDClassifier',
          'RidgeClassifier', 'RidgeClassifierCV',
示例#14
0
def ridge_classifiers():
	ridge = OneVsRestClassifier(linear_model.RidgeClassifier())
	return ridge
示例#15
0
 def ridgeclassifier(self):
     clf = linear_model.RidgeClassifier()
     return clf
示例#16
0
 def __init__(self, X, Y, alpha=2):
     super(RidgeClassifier, self).__init__(X, Y)
     self.alpha = alpha
     self.classifier = linear_model.RidgeClassifier(alpha=self.alpha)
示例#17
0
feature_selection_performance = []

# Classification (without feature selection):
print('Without feature selection')
X = data.iloc[:, 1:34]
y = data.iloc[:, 0]
X_train, X_test, y_train, y_test = train_test_split(X,
                                                    y,
                                                    test_size=0.2,
                                                    random_state=42)
no_selection_performance = []

print('Ridge')
lin_reg = linear_model.RidgeClassifier(alpha=1000,
                                       fit_intercept=True,
                                       normalize=False,
                                       solver='lsqr',
                                       tol=1e-2)
lin_reg.fit(X_train, y_train)
y_test_pred = lin_reg.predict(X_test)
matrix = confusion_matrix(y_test, y_test_pred)
score = lin_reg.score(X_test, y_test)
no_selection_performance.append(('Ridge', score, matrix))

print('SGD')
sgdClassifier = linear_model.SGDClassifier(fit_intercept=True,
                                           loss='log',
                                           max_iter=1000,
                                           penalty='l1',
                                           shuffle=False,
                                           tol=0.01)
示例#18
0
    subsample=1, 
    colsample_bytree=1, 
    colsample_bylevel=1, 
    colsample_bynode=1, 
    reg_alpha=0, 
    reg_lambda=1, 
    scale_pos_weight=1, 
    base_score=0.5, 
    random_state=0, 
    missing=None)

Ridge_C = linear_model.RidgeClassifier(
    alpha=1.0, 
    fit_intercept=True, 
    normalize=False, 
    copy_X=True, 
    max_iter=None, 
    tol=0.001, 
    class_weight=None, 
    solver='auto', 
    random_state=None)

LogisticR_C = linear_model.LogisticRegression(
    penalty='l2', 
    dual=False, 
    tol=0.0001, 
    C=1.0, 
    fit_intercept=True, 
    intercept_scaling=1, 
    class_weight=None, 
    random_state=None, 
    solver='lbfgs', 
示例#19
0
import math
from sklearn.utils.extmath import safe_sparse_dot
from sklearn.model_selection import train_test_split
warnings.filterwarnings("ignore", category=FutureWarning)

linkF = 'D:/DATA/CODE/GraduationProject/Features_Data/'
case = 'blur'
dirTrain = linkF
dirTest = linkF
dataPre.loadDataTrain(dirTrain, case)
dataPre.loadDataTest(dirTest, case)
matTrain, matVal, labelTrain, labelVal = train_test_split(dataPre.matTrain,
                                                          dataPre.labelTrain,
                                                          test_size=0.2,
                                                          random_state=1)
dataPre.FindMaxMin(matTrain)
dataPre.StandardData(matTrain)
dataPre.StandardData(matVal)
dataPre.StandardData(dataPre.matTest)

logreg = linear_model.RidgeClassifier(alpha=0.1)
# If num_Features = 9, set_up alpha = 70
# Elif num_Features = 11, set_up alpha = 33.
logreg.fit(matTrain, labelTrain)
print(case.upper())
labelVal_pred = logreg.predict(matVal)
print("Accuracy: %.2f %%" % (100 * accuracy_score(labelVal, labelVal_pred)))
labelTest_pred = logreg.predict(dataPre.matTest)
print("Accuracy: %.2f %%" %
      (100 * accuracy_score(dataPre.labelTest, labelTest_pred)))
    df_1325_red.drop(columns=i)


x_train=dateex(df_1325_red,start_date = '03-10-2008',end_date = '06-01-2012')[X_col_red].drop(columns='daily_return')
y_train=dateex(df_1325_red,start_date = '03-10-2008',end_date = '06-01-2012')['sign_daily_return']
dates_train=dateex(df_1325_red,start_date = '03-10-2008',end_date = '06-01-2012')['date']
x_test=dateex(df_1325_red,start_date = '08-01-2012',end_date = '06-01-2014')[X_col_red].drop(columns='daily_return')
y_test=dateex(df_1325_red,start_date = '08-01-2012',end_date = '06-01-2014')['sign_daily_return']
dates=dateex(df_1325_red,start_date = '08-01-2012',end_date = '06-01-2014')['date']

"""Fit ridge regression wih optimised alpha."""

from sklearn import linear_model
from sklearn import metrics
alpha_good=0.5
reg = linear_model.RidgeClassifier(alpha=0.5)
reg.fit(x_train,y_train)
y_train_hat=reg.predict(x_train)
w_old=metrics.mean_squared_error(y_train, y_train_hat)
for x in range(10000):
    reg = linear_model.RidgeClassifier(alpha=x*0.0001)
    reg.fit(x_train,y_train)
    y_train_hat=reg.predict(x_train)
    w= metrics.mean_squared_error(y_train, y_train_hat)
    if w<w_old:
        alpha_good=x*0.0001
        w_old=w
best_alpha_reg = linear_model.RidgeClassifier(alpha=alpha_good)
best_alpha_reg.fit(x_train,y_train)
y_train_hat_rid=best_alpha_reg.predict(x_train)
print('In of sample Mean Squared Error:', w_old)
示例#21
0
                  [np.sin(theta), np.cos(theta)]])

    xx, yy = np.dot(R, [xx, yy])
    ## skalowanie
    xx /= max(np.absolute(xx))
    yy /= max(np.absolute(yy))
    ## przypisanie do X
    X[row, ::2] = xx
    X[row, 1::2] = yy

## Rozdzielenie danych do późniejszego liczenia 'accuracy' i 'confusion matrix'
X_train, X_test, y_train, y_test = model_selection.train_test_split(
    X, y, test_size=0.1, stratify=y)

## UTWORZENIE OBIEKTU KLASYFIKATORA
clf = linear_model.RidgeClassifier(alpha=0.14026845637583893,
                                   fit_intercept=False)

## CROSS-VALIDACJA
scores = model_selection.cross_validate(clf,
                                        X_train,
                                        y_train,
                                        return_estimator=True,
                                        n_jobs=-1)
print('The score array for test scores on each cv split:',
      scores['test_score'])
print('Mean of above:', scores['test_score'].mean())

## WYBRANIE NAJLEPSZEGO ESTYMATORA I PREDYKCJA DLA WSZYTKICH DANYCH
best_clf = scores['estimator'][np.argmax(scores['test_score'])]
print('Accuracy on final set:', best_clf.score(X_test, y_test))
示例#22
0
                      param_grid={
                          "fit_intercept": [True, False],
                          "selection": ["cyclic", "random"]
                      },
                      cv=3)
lassoc.fit(x_train, y_train)

print("    [ ] Algorithm 2: Least Angle Regression classifier...")
larsc = GridSearchCV(estimator=linear_model.Lars(copy_X=True),
                     param_grid={"fit_intercept": [True, False]},
                     cv=3)
larsc.fit(x_train, y_train)

print("    [ ] Algorithm 3: Ridge regression classifier...")
rrc = GridSearchCV(
    estimator=linear_model.RidgeClassifier(),
    param_grid={
        "solver":
        ["auto", "svd", "cholesky", "lsqr", "sparse_cg", "sag", "saga"],
        "fit_intercept": [True, False]
    },
    cv=3)
rrc.fit(x_train, y_train)

print("    [ ] Algorithm 4: Stochastic Gradient Descent classifier...")
sgdc = GridSearchCV(estimator=linear_model.SGDClassifier(tol=0.001,
                                                         max_iter=1000,
                                                         n_jobs=-1),
                    param_grid={
                        "loss":
                        ["hinge", "log", "modified_huber", "perceptron"],
示例#23
0
        res += ", C = " + str(params[0]) + ", gamma = " + str(params[1])
    if name == POLY:
        res += ", C = " + str(params[0]) + ", gamma = " + str(
            params[1]) + ", degree = " + str(params[2])
    if name == NN:
        res += ", k = " + str(params[0])
    return res


if __name__ == "__main__":  # for parallelism under windows

    # Build Classifiers

    for C in penalties:
        # Add Ridge Classifier (One vs Rest approach)
        classifiers[RIDGE, (C, )] = linear_model.RidgeClassifier(alpha=C)

        for gamma in bandwidths:
            # Add RBF SVM Classifiers (One vs One approach)
            classifiers[RBF, (C, gamma)] = svm.SVC(
                kernel='rbf',
                C=C,  # regularization -> SVM
                gamma=gamma,  # bandwidth
                decision_function_shape='ovo')

            # # Add Laplacian SVM Classifiers (One vs One approach)             # always giving Accuracy = 0.34349763744093603 -> almost random
            # classifiers[LAP, (C, gamma)] = svm.SVC(                           # need to turn off parallelism
            #     kernel=lambda X,Y: laplacian_kernel(X,Y, gamma),
            #     C=C,
            #     decision_function_shape='ovo')
示例#24
0
    return dbc.Row([dbc.Col(title, md=8), dbc.Col(link, md=4)])


# CONSTANTS
numeric_cols = ['Age', 'Infrared Scan Results', 'Loading']

# LOAD DATA
spreasheets = pd.read_excel('./data.xlsx', sheet_name=list(range(5)))
df = pd.concat(spreasheets.values()).drop(columns=["ID", 'Heath Index'])
df.columns = df.columns.str.strip()

# CREATE ENCODER AND MODELS
oh_enc = OneHotEncoder(sparse=False)
models = {
    'Ridge': linear_model.RidgeClassifier(),
    'Logistic (L-BFGS)': linear_model.LogisticRegression(),
    'Logistic (SAGA)': linear_model.LogisticRegression(solver='saga'),
    'SGD': linear_model.SGDClassifier(),
}

# PREPROCESS DATASET
X = np.hstack(
    [oh_enc.fit_transform(df[['Visual Conditions']]), df[numeric_cols].values])
y = df['Oil Leak'].values
X_train, X_test, y_train, y_test = train_test_split(X, y)

# train model
for name, model in models.items():
    model.fit(X_train, y_train)
示例#25
0
**3. Разбейте выборку на обучение и тест, использовав метод train_test_split библиотеки model selection в пропорции 70-30, параметр random state = 1**
"""

data_df = data[["Total day charge", "Customer service calls"]].copy() #Отбираем анализируемые колонки
labels_df = data["Churn"].copy() #Отбираем результирующую колонку

data_list = data_df.to_numpy().tolist() #Приведение к списку списков
data_labels = labels_df.to_numpy().tolist() #Приведение к списку

train_data, test_data, train_labels, test_labels = model_selection.train_test_split(data_list, data_labels, 
                                                                                    test_size = 0.3,
                                                                                    random_state = 1)

"""**4. Создайте объект ridge classifier, настройте его на обучающей выборке и примените его к тестовым данным. Посмотрите на результат. Почему мы получили константную модель?**"""

ridge_classifier = linear_model.RidgeClassifier(random_state = 1)

ridge_classifier.fit(train_data, train_labels)

ridge_predictions = ridge_classifier.predict(test_data)
print(ridge_predictions)

"""*Анализ результата:* Полученная констатная модель, выдающая 0 значение при любой комбинации признаков "Total day charge" и "Customer service calls" свидетельствует об отсутствии зависимости между ними и результирующим признаком ухода пользователя от оператора ("Churn"), подтверждая выводы, полученные графически.

**5. Повторите п.4 для логистической регрессии, для тестовых данных рассчитайте вероятность отнесения объекта к каждому классу**
"""

log_regressor = linear_model.LogisticRegression(random_state = 1)

log_regressor.fit(train_data, train_labels)
示例#26
0
    print 'predict test 3'
    testlabels = testlabeling(testdata,testouts,betinterval,testres3)
    countTAs,countwins,pwin1,moneynow,perhour = AItester(testdata,testlabels,predicted,initmoney,bet,payrate,1)

else :
    pwin=0

pwinmax = pwin
if testmode != 1:
    print 'current max prob:',pwinmax
    for i in range(0,1000):
        print i
        if noAI == 1:
            print 'now fitting...'
            if fitSVC == 1:
                clf = linear_model.RidgeClassifier(alpha=1e-15,copy_X=False,tol=1e-16)
                #clf = svm.LinearSVC(decisi{n_function_shape='ovr',verbose=1)
                clf.fit(traindata,labels.ravel())
            elif fitSVC == 2:
                clf = ensemble.BaggingClassifier(ExtraTreeClassifier(),n_estimators=numDTC)
                clf.fit(traindata,labels.ravel())
            elif fitSVC == 3:
                clf = neighbors.KNeighborsClassifier(n_neighbors=20)
                clf.fit(traindata,labels.ravel())
            else :
                clf = DecisionTreeClassifier(max_depth=25,min_samples_leaf=10)
                clf.fit(traindata,labels)
            print 'done!'
        else :
            clf = joblib.load(outname) 
示例#27
0
 def get_skl_estimator(self, **default_parameters):
     return linear_model.RidgeClassifier(**default_parameters)
def predict(train_list, train_result, test_list, method_list, **kwargs):
    def fit_predict_each_output(model, target):
        __predict_result = []
        for idx in range(np.size(target, 1)):
            model.fit(train_list, target[:, idx])
            __predict_result.append(model.predict(test_list))
        return np.transpose(np.asarray(__predict_result))

    def fit_predict(model, target):
        model.fit(train_list, target)
        return model.predict(test_list)

    from_bins_idx = kwargs["from_bins_idx"]
    to_bins_idx = kwargs["to_bins_idx"]
    _binned_train_result = to_bins_idx(train_result)

    _predict_result = []
    if "current" in method_list:
        rbm = neural_network.BernoulliRBM(n_components=512, verbose=False, n_iter=100, learning_rate=1e-2, random_state=0)
        rbm.fit(train_list)
        rbm.fit(test_list)
        _predict_result.append(np.transpose(np.asarray(__predict_result)))
    elif "knn" in method_list:
        _ = knn_predict(train_list, _binned_train_result, test_list, k=kwargs["k"])
        _predict_result.append(from_bins_idx(np.asarray(_, dtype=int)))
    elif "dt" in method_list:
        _ = fit_predict(tree.DecisionTreeClassifier(max_depth=kwargs["max_depth"]), _binned_train_result)
        _predict_result.append(from_bins_idx(np.asarray(_, dtype=int)))
    elif "rf" in method_list:
        _ = fit_predict(ensemble.RandomForestClassifier(n_estimators=kwargs["n_estimators"], max_depth=kwargs["max_depth"], n_jobs=kwargs["n_jobs"]), _binned_train_result)
        _predict_result.append(from_bins_idx(np.asarray(_, dtype=int)))
    elif "average" in method_list:
        _ = average_predict(train_result, test_list)
        _predict_result.append(from_bins_idx(np.asarray(_, dtype=int)))
    elif "adaboost" in method_list:
        _ = fit_predict_each_output(ensemble.AdaBoostClassifier(), _binned_train_result)
        _predict_result.append(from_bins_idx(np.asarray(_, dtype=int)))
    elif "ridge" in method_list:
        _ = fit_predict_each_output(linear_model.RidgeClassifier(), _binned_train_result)
        _predict_result.append(from_bins_idx(np.asarray(_, dtype=int)))
    elif "linear" in method_list:
        _predict_result.append(fit_predict_each_output(linear_model.LinearRegression(), train_result))
    elif "huber" in method_list:
        _predict_result.append(fit_predict_each_output(linear_model.HuberRegressor(), train_result))
    elif "theilsen" in method_list:
        _predict_result.append(fit_predict_each_output(linear_model.TheilSenRegressor(), train_result))
    elif "lasso" in method_list:
        _predict_result.append(fit_predict_each_output(linear_model.Lasso(), train_result))
    elif "par" in method_list:
        _predict_result.append(fit_predict_each_output(linear_model.PassiveAggressiveRegressor(C=kwargs["par_C"], epsilon=kwargs["par_eps"]), train_result))
    elif "ridge_reg" in method_list:
        _predict_result.append(fit_predict_each_output(linear_model.Ridge(), train_result))
    elif "dt_reg" in method_list:
        _predict_result.append(fit_predict(tree.DecisionTreeRegressor(max_depth=kwargs["max_depth"]), train_result))
    elif "rf_reg" in method_list:
        _predict_result.append(fit_predict(ensemble.RandomForestRegressor(max_depth=kwargs["max_depth"], n_jobs=kwargs['n_jobs'], n_estimators=kwargs['n_estimators']), train_result))
    elif "xgboost" in method_list:
        _predict_result.append(fit_predict_each_output(xgb.XGBClassifier(max_depth=kwargs["max_depth"], n_estimators=kwargs['n_estimators'], nthread=kwargs["nthread"]), _binned_train_result))
    elif "xgboost_reg" in method_list:
        _predict_result.append(fit_predict_each_output(xgb.XGBRegressor(max_depth=kwargs["max_depth"], n_estimators=kwargs['n_estimators'], nthread=kwargs["nthread"]), train_result))
    elif "svr" in method_list:
        _predict_result.append(fit_predict_each_output(svm.SVR(C=kwargs["C"], epsilon=kwargs["epsilon"]), train_result))
    elif "linear_svr" in method_list:
        _predict_result.append(fit_predict_each_output(svm.LinearSVR(C=kwargs["C"], epsilon=kwargs["epsilon"]), train_result))
    else:
        assert False, "invalid method"
    return np.asarray(_predict_result)
示例#29
0
print("First 5 rows in test data:")
print(test_df['text'].head(5))
print("First 5 rows in test data - cleaned:")
print(test_df['text_clean'].head(5))

# Apply Count Vectorizer
count_vect = CountVectorizer(analyzer=text_utils.clean_text)

# create vectors for all training tweets
train_vect = count_vect.fit_transform(train_df["text"])
# create vectors for all test tweets
test_vect = count_vect.transform(test_df["text"])

# build a linear model for classification using Ridge regression
clf = linear_model.RidgeClassifier()

scores = model_selection.cross_val_score(clf,
                                         train_vect,
                                         train_df["target"],
                                         cv=3,
                                         scoring="accuracy")
# [0.714342   0.65602837 0.69846275]
# scores = model_selection.cross_val_score(clf, train_vect, train_df["target"], cv=3, scoring="f1")
# [0.59878251 0.53089737 0.60949464]
print(scores)

# fit the train datapredict labels for test tweets
clf.fit(train_vect, train_df["target"])

sample_submission = pd.read_csv("dataset/sample_submission.csv")
 def getEngines(self):
     return [(svm.LinearSVC(), 'linear-svm'),
             (svm.SVC(kernel='rbf'), 'rbf-svm'),
             (ensemble.RandomForestClassifier(), 'random-forest'),
             (linear_model.RidgeClassifier(alpha=2.0), 'ridge-regression')]