Exemplos de cross_val_score em Python, exemplos de sklearn_pandas.cross_val_score em Python

Exemplo n.º 1

0

Exibir arquivo

Arquivo: script.py Projeto: karlderkaefer/scikit

def cross_validation(df, mapper):
    pipe = sklearn.pipeline.Pipeline([
        ('featurized', mapper),
        ('lm', sklearn.linear_model.LinearRegression())
    ])
    result = cross_val_score(pipe, df.copy(), df.Sales)
    print(np.round(result), 2)

Exemplo n.º 2

0

Exibir arquivo

Arquivo: test_dataframe_mapper.py Projeto: yonglehou/sklearn-pandas

def test_with_car_dataframe(cars_dataframe):
    pipeline = Pipeline(
        [("preprocess", DataFrameMapper([("description", CountVectorizer())])), ("classify", SVC(kernel="linear"))]
    )
    data = cars_dataframe.drop("model", axis=1)
    labels = cars_dataframe["model"]
    scores = cross_val_score(pipeline, data, labels)
    assert scores.mean() > 0.30

Exemplo n.º 3

0

Exibir arquivo

def test_with_car_dataframe(cars_dataframe):
    pipeline = Pipeline([("preprocess",
                          DataFrameMapper([
                              ("description", CountVectorizer()),
                          ])), ("classify", SVC(kernel='linear'))])
    data = cars_dataframe.drop("model", axis=1)
    labels = cars_dataframe["model"]
    scores = cross_val_score(pipeline, data, labels)
    assert scores.mean() > 0.30

Exemplo n.º 4

0

Exibir arquivo

def test_with_iris_dataframe(iris_dataframe):
    pipeline = Pipeline([("preprocess",
                          DataFrameMapper([
                              ("petal length (cm)", None),
                              ("petal width (cm)", None),
                              ("sepal length (cm)", None),
                              ("sepal width (cm)", None),
                          ])), ("classify", SVC(kernel='linear'))])
    data = iris_dataframe.drop("species", axis=1)
    labels = iris_dataframe["species"]
    scores = cross_val_score(pipeline, data, labels)
    assert scores.mean() > 0.96
    assert (scores.std() * 2) < 0.04

Exemplo n.º 5

0

Exibir arquivo

Arquivo: test_dataframe_mapper.py Projeto: ChadiHelwe/sklearn-pandas

def test_with_iris_dataframe(iris_dataframe):
    pipeline = Pipeline([
        ("preprocess", DataFrameMapper([
            ("petal length (cm)", None),
            ("petal width (cm)", None),
            ("sepal length (cm)", None),
            ("sepal width (cm)", None),
        ])),
        ("classify", SVC(kernel='linear'))
    ])
    data = iris_dataframe.drop("species", axis=1)
    labels = iris_dataframe["species"]
    scores = cross_val_score(pipeline, data, labels)
    assert scores.mean() > 0.96
    assert (scores.std() * 2) < 0.04

Exemplo n.º 6

0

Exibir arquivo

Arquivo: logistic_001.py Projeto: hxu/dshk1-loan-default

def logistic_001():
    X, y = classes.get_train_data()
    y = y > 0

    remove_object = classes.RemoveObjectColumns()
    X = remove_object.fit_transform(X)

    imputer = Imputer()
    X = imputer.fit_transform(X)
    scores = []

    for i in range(X.shape[1]):
        clf = LogisticRegression()
        s = cross_val_score(clf, X[:, i], y, scoring='roc')
        scores.append((i, s))

Exemplo n.º 7

0

Exibir arquivo

Arquivo: script.py Projeto: karlderkaefer/scikit

def cross_validation(df, mapper):
    pipe = sklearn.pipeline.Pipeline([
        ('featurized', mapper), ('lm', sklearn.linear_model.LinearRegression())
    ])
    result = cross_val_score(pipe, df.copy(), df.Sales)
    print(np.round(result), 2)

Exemplo n.º 8

0

Exibir arquivo

Arquivo: model.py Projeto: shubham-gaikwad/Movie-Performance-prediction

    ("one_hot_encoding", one_hot_encoding(categorical_features)),
    ("imputer", Imputer(axis=0, strategy='median')),
    ("random_forest", OneVsOneClassifier(RandomForestClassifier()))
])

kfold = KFold(n_splits=5, shuffle=True)
model = pipe_1.fit(x_train, y_train)
# model_file_path = '/Users/Aniket/Appzen/myenv/Source/semanticzen/learned_models/random_forest_baseline.pkl'
# joblib.dump(model, model_file_path)
# print '\n model : {0}'.format(model)
# print '\n Model is dumped to : {0}'.format(model_file_path)

scores = cross_val_score(
    model,  # steps to convert raw messages into models
    x_train,  # training data
    y_train,  # training labels
    cv=kfold,  # split data randomly into 10 parts: 9 for training, 1 for scoring
    scoring='accuracy',  # which scoring metric?
    n_jobs=-1,  # -1 = use all cores = faster
)

print '\n Train result : cross_validation'
print '\n Mean : {0}, std : (+/-) {1}'.format(scores.mean(), scores.std())

trained_model = model.steps[3][1]
print '\n trained_model : {0}'.format(trained_model)

y_prediction = model.predict(x_test)
report = classification_report(y_test, y_prediction)
print '\n ---------- Classification Report ------------'
print report

Exemplo n.º 9

0

Exibir arquivo

def crossval():
    cv = cross_val_score(pipe, X_train, y_train, cv=5)
    print("Cross Validation Scores are: ", cv.round(3))
    print("Mean CrossVal score is: ", round(cv.mean(),3))
    print("Std Dev CrossVal score is: ", round(cv.std(),3))

Exemplo n.º 10

0

Exibir arquivo

for i, v in table_y.iteritems():
    print("\t" + i + " : " + repr(v))
table_y = table['SOILCLASS'].value_counts()
print("Dataset features a total of " + repr(len(table_y)) + " soil classes.")
for i, v in table_y.iteritems():
    print("\t" + i + " : " + repr(v))

print("Training and evaluating classifier through 10-fold cross-validation...")
classifier = XGBClassifier(n_estimators=100, n_jobs=5)
classifier = sklearn.ensemble.RandomForestClassifier(n_estimators=1000,
                                                     n_jobs=5)
pipe = sklearn.pipeline.Pipeline([('featurize', mapper),
                                  ('classify', classifier)])
aux = cross_val_score(
    pipe,
    X=table,
    y=table.SOILCLASS,
    scoring=make_scorer(classification_report_with_accuracy_score),
    cv=10)
print("Overall results...")
print("Accuracy : " + repr(aux.mean()))
classification_report_with_accuracy_score(test_results_y_true,
                                          test_results_y_pred)

print("Training classification model on complete dataset...")
train_data = mapper.fit_transform(table)
classifier.fit(train_data[0:train_data.shape[0], 1:train_data.shape[1]],
               train_data[0:train_data.shape[0], 0])
joblib.dump(classifier, 'classification-model.joblib')

print("Infering the feature ranking within the classification model...")
if isinstance(classifier, XGBClassifier):

Exemplo n.º 11

0

Exibir arquivo

def classifiers_comparison():
    classifiers = [
        ("Regresja logistyczna",
         LogisticRegression(),
         {'classifier__C': 5.0}),

        ("Naiwny klas. bayesowski",
         MultinomialNB(),
         {'classifier__alpha': 0.1}),

        ("SVM (liniowy)",
         SVC(kernel='linear', probability=True),
         {'classifier__C': 3.5,
          'features__text_words': 500, 'features__subject_words': 50}),

        ("SVM (RBF)",
         SVC(kernel='rbf', probability=True),
         {'classifier__C': 0.5, 'classifier__gamma': 0.1,
          'features__text_words': 500, 'features__subject_words': 50}),

        ("Las drzew losowych",
         RandomForestClassifier(),
         {'classifier__n_estimators': 100}),
    ]

    clf_count = len(classifiers)

    train_mails = parse_mails(COMPLETE_ALL['filename'])
    train_labels = COMPLETE_ALL['label']
    plt.figure(figsize=(8, 12))
    for (clf_name, clf, params), (ls, lc) in zip(classifiers,
                                                 linestyles_gen()):
        model = AntispamModel(clf)
        model.spam_filter.set_params(**params)
        cv = StratifiedKFold(train_labels, 5)
        scorer = ROCScorer(params.keys())
        cross_val_score(model.spam_filter, train_mails, train_labels,
                        cv=cv, scoring=scorer, verbose=2)
        score = scorer.interp_scores.values()[0]
        label = clf_name
        plt.subplot(2, 1, 1)
        score.plot(label=label, lc=lc, ls=ls, fill_alpha=0.5 / clf_count)
        plt.subplot(2, 1, 2)
        score.plot(label=label, lc=lc, ls=ls, fill_alpha=0.5 / clf_count)
    plt.subplot(2, 1, 1)
    plt.grid(True)
    plt.xlabel('FPR')
    plt.ylabel('TPR')
    plt.legend(loc='lower right', fontsize='medium')
    plt.gca().add_patch(
        plt.Rectangle((0, 0.8), 0.2, 0.2, ls='dashed', fc='none')
    )
    plt.xlim(-0.05, 1)
    plt.ylim(0, 1.05)
    plt.subplot(2, 1, 2)
    plt.grid(True)
    plt.xlabel('FPR')
    plt.ylabel('TPR')
    plt.xlim(0, 0.2)
    plt.ylim(0.8, 1)
    plt.savefig('doc/charts/ROC_ALL.png')
    plt.show()

Exemplo n.º 12

0

Exibir arquivo

 def cv_score(self, train_data, labels):
     return cross_val_score(self.spam_filter, train_data, labels,
                            score_func=f1_score)

Exemplo n.º 13

0

Exibir arquivo

Arquivo: data_exploration.py Projeto: ant-on-su/DengAI

model = LinearRegression(normalize=True)

model.fit(X_train,np.log1p(y_train))
pred = np.exp(model.predict(X_test))-1

from sklearn.metrics import mean_absolute_error

print(mean_absolute_error(y_test,pred))

from sklearn.model_selection import GridSearchCV, cross_val_score
from sklearn.linear_model import Ridge
pipe_ridge = make_pipeline(preprocessing_features, Ridge())
param_grid = {'ridge__alpha' : [0.01, 0.05, 0.1, 0.5, 1, 5, 10, 50, 100]}
pipe_ridge_gs = GridSearchCV(pipe_ridge, param_grid=param_grid, scoring = 'neg_mean_squared_error', cv=3)
result = np.sqrt(-cross_val_score(pipe_ridge_gs, X_train, np.log1p(y_train), scoring = 'neg_mean_squared_error', cv = 5))
np.mean(result)

pipe_ridge_gs.fit(X_train, np.log1p(y_train))
predicted = np.exp(pipe_ridge_gs.predict(X_test)) -1

predicted= predicted.round()
print(mean_absolute_error(y_test,predicted))

df_TEST = pd.read_csv(path+file_test)
df_TEST.week_start_date = pd.to_datetime(df_TEST.week_start_date, yearfirst=True)

predicted_TEST = np.exp(pipe_ridge_gs.predict(df_TEST)) -1

pd.DataFrame(predicted_TEST).to_csv(path+'TEST.csv')

Exemplo n.º 14

0

Exibir arquivo

Arquivo: SVM.py Projeto: Trachelium123/Flsk-CNN

                                         alpha=0.0001,
                                         learning_rate='adaptive',
                                         learning_rate_init=0.001,
                                         max_iter=1000))])
 # 模型拟合
 nn.fit(data, label)
 # 模型预测
 nn_predict = nn.predict(X_test)
 # 模型评估
 # 基础打分
 nn_score = nn.score(X_test, y_test)
 print(nn_score)
 # 交叉验证
 nn_cross1 = cross_val_score(nn,
                             X_train,
                             y_train,
                             scoring='accuracy',
                             cv=10,
                             n_jobs=-1)
 nn_cross2 = cross_val_score(nn,
                             X_test,
                             y_test,
                             scoring='accuracy',
                             cv=10,
                             n_jobs=-1)
 print(nn_cross1)
 print(nn_cross2)
 # #     scores1.append(nn_cross1.mean())
 # #     scores2.append(nn_cross2.mean())
 # #     print(nn_cross1.mean())
 # #     print(nn_cross2.mean())
 # # plt.plot(scores1, linestyle='-', color='r', label='train')

Exemplo n.º 15

0

Exibir arquivo

Arquivo: classification-test.py Projeto: ldesousa/soil-classification

    ('UHDICM30', None), ('UHDICM40', None), ('LHDICM00', None),
    ('LHDICM10', None), ('LHDICM20', None), ('LHDICM30', None),
    ('LHDICM40', None), ('CRFVOL00', None), ('CRFVOL10', None),
    ('CRFVOL20', None), ('CRFVOL30', None), ('CRFVOL40', None),
    ('SNDPPT00', None), ('SNDPPT10', None), ('SNDPPT20', None),
    ('SNDPPT30', None), ('SNDPPT40', None), ('SLTPPT00', None),
    ('SLTPPT10', None), ('SLTPPT20', None), ('SLTPPT30', None),
    ('SLTPPT40', None), ('CLYPPT00', None), ('CLYPPT10', None),
    ('CLYPPT20', None), ('CLYPPT30', None),
    ('CLYPPT40', None), ('BLD00', None), ('BLD10', None), ('BLD20', None),
    ('BLD30', None), ('BLD40', None), ('PHIHOX00', None), ('PHIHOX10', None),
    ('PHIHOX20', None), ('PHIHOX30', None), ('PHIHOX40', None),
    ('PHIKCL00', None), ('PHIKCL10', None), ('PHIKCL20', None),
    ('PHIKCL30', None), ('PHIKCL40', None), ('ORCDRC00', None),
    ('ORCDRC10', None), ('ORCDRC20', None), ('ORCDRC30', None),
    ('ORCDRC40', None), ('CECSUM00', None), ('CECSUM10', None),
    ('CECSUM20', None), ('CECSUM30', None), ('CECSUM40', None)
])
table_y = table_y['WRB_2006_NAMEf_2'].value_counts()
print("Dataset features a total of " + repr(len(table_y)) + " soil classes.")
print("Training and evaluating classifier through 10-fold cross-validation...")
classifier = sklearn.ensemble.RandomForestClassifier(n_estimators=100)
#classifier = GCForest(get_gcforest_config())
pipe = sklearn.pipeline.Pipeline([('featurize', mapper),
                                  ('classify', classifier)])
cross_val_score(pipe,
                X=table,
                y=table.WRB_2006_NAMEf_2,
                scoring=make_scorer(classification_report_with_accuracy_score),
                cv=10)