예제 #1
0
plt.title('Input data')

###############################################
# Train test split and SVM training
from sklearn import model_selection
from sklearn.svm import SVC

X_train, X_test, y_train, y_test = model_selection.train_test_split(
    X, y, test_size=0.25, random_state=5)

# params = {'kernel': 'linear'}
# params = {'kernel': 'poly', 'degree': 3}
params = {'kernel': 'rbf'}
classifier = SVC(**params)
classifier.fit(X_train, y_train)
utilities.plot_classifier(classifier, X_train, y_train, 'Training dataset')

y_test_pred = classifier.predict(X_test)
utilities.plot_classifier(classifier, X_test, y_test, 'Test dataset')

###############################################
# Evaluate classifier performance

from sklearn.metrics import classification_report

target_names = ['Class-' + str(int(i)) for i in set(y)]
print("\n" + "#" * 30)
print("\nClassifier performance on training dataset\n")
print(
    classification_report(y_train,
                          classifier.predict(X_train),
예제 #2
0
#params = {'kernel': 'linear'}

# Building Nonlinear Classifier Using SVMs

# Using Polynomial function
# params = {'kernel': 'poly', 'degree': 3}

# Using Radial Basis funciton
params = {'kernel': 'rbf'}

classifier = SVC(**params)
classifier.fit(X_train, y_train)
'''utilities.plot_classifier(classifier, X_train, y_train, 'Training dataset')'''

y_test_pred = classifier.predict(X_test)
classifier.fit(X_test, y_test)
utilities.plot_classifier(classifier, X_test, y_test, 'Test dataset')

# Evaluate classifiers performances
from sklearn.metrics import classification_report

target_names = ['Class-' + str(int(i)) for i in set(y)]
print('#'*30 + '\n')
print('\nClassifier Performance on Training Dataset\n')
print(classification_report(y_train, classifier.predict(X_train), target_names=target_names))
print('#'*30 + '\n')

print('#'*30 + '\n')
print('\nClassifier Performance on Test Dataset\n')
print(classification_report(y_test, classifier.predict(X_test), target_names=target_names))
print('#'*30 + '\n')
예제 #3
0
from sklearn import cross_validation

X_train, X_test, y_train, y_test = cross_validation.train_test_split(
    X, y, test_size=0.25, random_state=5)

params = {'kernel': 'rbf'}
classifier = SVC(**params)
classifier.fit(X_train, y_train)

###############################################
# Measure distance from the boundary

input_datapoints = np.array([[2, 1.5], [8, 9], [4.8, 5.2], [4, 4], [2.5, 7],
                             [7.6, 2], [5.4, 5.9]])
print "\nDistance from the boundary:"
for i in input_datapoints:
    print i, '-->', classifier.decision_function(i)[0]

# Confidence measure
params = {'kernel': 'rbf', 'probability': True}
classifier = SVC(**params)
classifier.fit(X_train, y_train)
print "\nConfidence measure:"
for i in input_datapoints:
    print i, '-->', classifier.predict_proba(i)[0]

utilities.plot_classifier(classifier, input_datapoints,
                          [0] * len(input_datapoints), 'Input datapoints',
                          'True')
plt.show()
plt.figure()
plt.scatter(class_0[:,0], class_0[:,1], facecolors='black', edgecolors='black', marker='s')
plt.scatter(class_1[:,0], class_1[:,1], facecolors='None', edgecolors='black', marker='s')
plt.title('Input data')

###############################################
# Train test split
from sklearn import cross_validation

X_train, X_test, y_train, y_test = cross_validation.train_test_split(X, y, test_size=0.25, random_state=5)

params = {'kernel': 'linear'}
#params = {'kernel': 'linear', 'class_weight': 'auto'}
classifier = SVC(**params)
classifier.fit(X_train, y_train)
utilities.plot_classifier(classifier, X_train, y_train, 'Training dataset')

y_test_pred = classifier.predict(X_test)
utilities.plot_classifier(classifier, X_test, y_test, 'Test dataset')

###############################################
# Evaluate classifier performance

from sklearn.metrics import classification_report

target_names = ['Class-' + str(int(i)) for i in set(y)]
print "\n" + "#"*30
print "\nClassifier performance on training dataset\n"
print classification_report(y_train, classifier.predict(X_train), target_names=target_names)
print "#"*30 + "\n"
X, y = utilities.load_data(input_file)

###############################################
# Train test split
from sklearn import cross_validation

X_train, X_test, y_train, y_test = cross_validation.train_test_split(X, y, test_size=0.25, random_state=5)

params = {'kernel': 'rbf'}
classifier = SVC(**params)
classifier.fit(X_train, y_train)

###############################################
# Measure distance from the boundary

input_datapoints = np.array([[2, 1.5], [8, 9], [4.8, 5.2], [4, 4], [2.5, 7], [7.6, 2], [5.4, 5.9]])
print "\nDistance from the boundary:"
for i in input_datapoints:
    print i, '-->', classifier.decision_function(i)[0]

# Confidence measure
params = {'kernel': 'rbf', 'probability': True}
classifier = SVC(**params)
classifier.fit(X_train, y_train)
print "\nConfidence measure:"
for i in input_datapoints:
    print i, '-->', classifier.predict_proba(i)[0]

utilities.plot_classifier(classifier, input_datapoints, [0]*len(input_datapoints), 'Input datapoints', 'True')
plt.show()
            edgecolors='black',
            marker='s')
plt.title('Input data')
plt.show()

from sklearn import model_selection
X_train, X_test, y_train, y_test = model_selection.train_test_split(
    X, y, test_size=0.25, random_state=5)
'''The class_weight parameter will count the number of datapoints in each class
to adjust the weights so that the imbalance doesn't adversely affect the
performance.'''
'''Probability counting '''
params = {'kernel': 'linear', 'class_weight': 'balanced', 'probability': True}
classifier = SVC(**params, gamma='auto')
classifier.fit(X_train, y_train)
utilities.plot_classifier(classifier, X_train, y_train, 'Training dataset')
plt.show()
'''C is a hyperparameter that determines the penalty for the incorrect 
classification of an observation. So, we used a weight for the classes to manage 
unbalanced classes. In this way, we will assign a new value of C to the classes, 
defined as follows: 
    C(i) = C * w(i)
Where C is the penalty, w(i) is a weight inversely  proportional to class i's 
frequency, and C(i) is the C value for class i. This method suggests increasing 
the penalty to classify the less represented classes  so as to prevent them 
from being outclassed by the most represented class. In the scikit-learn library, 
when using SVC, we can set the values for Ci  automaticallyby setting 
class_weight='balanced'.'''

print("Confidence measure:")
for i in class_0:
# Specify L1 regularization
lr = LogisticRegression(penalty='l1')

# Instantiate the GridSearchCV object and run the search
searcher = GridSearchCV(lr, {'C': [0.001, 0.01, 0.1, 1, 10, 100, 1000]})
searcher.fit(X_train, y_train)

# Report the best parameters
print("Best CV params", searcher.best_params_)

# Find the number of nonzero coefficients (selected features)
best_lr = searcher.best_estimator_
coefs = best_lr.coef_
print("Total number of features:", coefs.size)
print("Number of selected features:", np.count_nonzero(coefs))
plot_classifier(X_train, y_train, searcher, proba=True)

# Get predicted probabilities
proba = searcher.predict_proba(X_train)

# Sort the example indices by their maximum probability
proba_inds = np.argsort(np.max(proba, axis=1))


# function to plot the imagem according de index
def show_digit(proba_inds):
    plt.gray()
    plt.matshow(digits.images[proba_inds])
    return plt.show()

예제 #8
0
파일: svm_confidence.py 프로젝트: Roc-J/SVM
from sklearn.svm import SVC
from sklearn import cross_validation

filename = 'data_multivar.txt'
X, y = utilities.load_data(filename)

print u'-----svm--------'
X_train, X_test, y_train, y_test = cross_validation.train_test_split(
    X, y, train_size=0.25, random_state=5)

# params = {'kernel': 'rbf'}
params = {'kernel': 'rbf', 'probability': True}
classifier = SVC(**params)
classifier.fit(X_train, y_train)

utilities.plot_classifier(classifier, X_train, y_train, 'Training dataset')
plt.show()

########################
# measure distance from the boundary
input_datapoints = np.array([[2, 1.5], [8, 9], [4.8, 5.2], [4, 4], [2.5, 7],
                             [7.6, 2], [5.4, 5.9]])
print "\n Distance from the boundary"
for i in input_datapoints:
    # print i, '-->', classifier.decision_function(i.reshape(1, -1))[0]
    print i, '-->', classifier.predict_proba(i.reshape(1, -1))[0]

utilities.plot_classifier(classifier, input_datapoints,
                          [0] * len(input_datapoints), 'Input datapoints',
                          True)
plt.show()