Пример #1
0
print(Y.shape)

# normalize X
scaler = Norm().fit(X)
rescaledX = scaler.transform(X)

# split into train test sets using t_t_s
# because we combined the datasets to apply uniform
# one hot and label encoding, we set 'shuffle' parameter as false
# we also know that there should be 15060 rows in the test sets
test_set_size = test_dataset_nomissing.shape[0]
print('\n test_set_size...')
print(test_set_size)
X_train, X_test, Y_train, Y_test = t_t_s(rescaledX,
                                         Y,
                                         test_size=test_set_size,
                                         random_state=seed,
                                         shuffle=False)

# instantiate XGBC class using defaults
model = XGBC()

# fit model to training datasets
print('\n training d model...')
model.fit(X_train, Y_train)

# view trained model
print('\n model...')
print(model)

# make predictions for test data
Пример #2
0
dataset = dataset.drop(['fil', 'status'], axis=1)

# separate into X and Y
X = dataset.iloc[:, :17]
Y = dataset.iloc[:, 17]
X = X.values
Y = Y.values
print(Y.shape)
encoder = LE()
encoder.fit(Y)
# encoded_Y = encoder.transform(Y)
Y = encoder.transform(Y)

X_train, X_test, Y_train, Y_test = t_t_s(X,
                                         Y,
                                         test_size=validation_size,
                                         random_state=seed,
                                         shuffle=False)

# instantiate XGBC class using defaults
model = XGBC()

# fit model to training datasets
print('\n training d model...')
model.fit(X_train, Y_train)

# view trained model
print('\n model...')
print(model)

# make predictions for test data
# -*- coding: utf-8 -*-
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split as t_t_s
from sklearn.naive_bayes import GaussianNB as GNB
from sklearn.decomposition import PCA
from sklearn.mixture import GaussianMixture as GM
from sklearn.metrics import accuracy_score as a_s

df = sns.load_dataset('iris')
x = df.drop('species', axis=1)
y = df['species']

xtr, xte, ytr, yte = t_t_s(x, y, test_size=0.25, random_state=0)
print(xtr.shape, yte.shape)  # (112, 4) (38,)
model = GNB()
model.fit(xtr, ytr)
ypred = model.predict(xte)
print("分类准确率:{0:.2%}".format(a_s(yte, ypred)))

# dimensionality reduction
pca = PCA(n_components=2)
new_x = pca.fit_transform(x)
xtr_new, xte_new, ytr_new, yte_new = t_t_s(new_x,
                                           y,
                                           test_size=0.25,
                                           random_state=0)
print(xtr_new.shape, yte_new.shape)  # (112, 2) (38,)
model1 = GNB()
model1.fit(xtr_new, ytr_new)
Пример #4
0
print(new.shape)
sns.set(style='whitegrid')
plt.figure()
plt.scatter(new[:, 0],
            new[:, 1],
            c=digits.target,
            cmap=plt.cm.get_cmap('Spectral', 10),
            edgecolor='none',
            alpha=0.6)
plt.colorbar(label='Digits', ticks=range(10), extend='both')
plt.clim(-0.5, 9.5)

# classification
model = RFC(n_estimators=400)
xtr, xte, ytr, yte = t_t_s(digits.data,
                           digits.target,
                           test_size=0.2,
                           random_state=0)
model.fit(xtr, ytr)
ypred = model.predict(xte)
fig, ax = plt.subplots(10,
                       10,
                       figsize=(14, 10),
                       subplot_kw={
                           'xticks': [],
                           'yticks': []
                       },
                       gridspec_kw=dict(hspace=0.1, wspace=0.1))
for i, axi in enumerate(ax.flat):
    axi.imshow(xte.reshape(-1, 8, 8)[i], cmap='binary')
    axi.text(0.05,
             0.05,