Exemplo n.º 1
0
#SVM ->
svm = SVC(kernel='linear', probability=True, random_state=40)
pipeline_svm = utility.pipeline_setup(svm) #pipeline_svm obj to be used in all svm algos
pipeline_svm_fitted = pipeline_svm.fit(docs_train.data, docs_train.target)
# svm_predict = pipeline_svm_fitted.predict(docs_test.data)
# utility.print_stats(docs_test.target, svm_predict, 'SVM Normal')
# utility.draw_roc_curve(docs_test.target, pipeline_svm_fitted.predict_proba(docs_test.data)[:, 1])

#Soft margin SVM ->
#confirm this part, not sure of any other way to implement soft margin SVM
params = {
    'learning_algo__gamma': [1e-3, 1e3] #10^-3 to 10^3
}
svm_soft_margin = GridSearchCV(pipeline_svm, params, cv=5)
svm_soft_margin_fitted = svm_soft_margin.fit(docs_train.data, docs_train.target)
svm_soft_margin_predict = svm_soft_margin_fitted.predict(docs_test.data)
utility.print_stats(docs_test.target, svm_soft_margin_predict, 'Soft Margin SVM')
utility.draw_roc_curve(docs_test.target, svm_soft_margin_fitted.predict_proba(docs_test.data)[:, 1])

best_params = svm_soft_margin.best_estimator_.get_params()
for param_name in sorted(params.keys()):
    print("\t{}: {}".format(param_name, best_params[param_name]))
              
#Logistic Regression ->
# logistic_regr = LogisticRegression(penalty='l2', max_iter=5, random_state=40)
# pipeline_regr = utility.pipeline_setup(logistic_regr)
# pipeline_regr_fitted = pipeline_regr.fit(docs_train.data, docs_train.target)
# regr_predict = pipeline_regr_fitted.predict(docs_test.data)
# utility.print_stats(docs_test.target, regr_predict, 'Logistic Regression')
# utility.draw_roc_curve(docs_test.target, pipeline_regr_fitted.predict_proba(docs_test.data)[:, 1])
Exemplo n.º 2
0
pipeline_svm_fitted = pipeline_svm.fit(docs_train.data, docs_train.target)
# svm_predict = pipeline_svm_fitted.predict(docs_test.data)
# utility.print_stats(docs_test.target, svm_predict, 'SVM Normal')
# utility.draw_roc_curve(docs_test.target, pipeline_svm_fitted.predict_proba(docs_test.data)[:, 1])

#Soft margin SVM ->
#confirm this part, not sure of any other way to implement soft margin SVM
params = {
    'learning_algo__gamma': [1e-3, 1e3]  #10^-3 to 10^3
}
svm_soft_margin = GridSearchCV(pipeline_svm, params, cv=5)
svm_soft_margin_fitted = svm_soft_margin.fit(docs_train.data,
                                             docs_train.target)
svm_soft_margin_predict = svm_soft_margin_fitted.predict(docs_test.data)
utility.print_stats(docs_test.target, svm_soft_margin_predict,
                    'Soft Margin SVM')
utility.draw_roc_curve(
    docs_test.target,
    svm_soft_margin_fitted.predict_proba(docs_test.data)[:, 1])

best_params = svm_soft_margin.best_estimator_.get_params()
for param_name in sorted(params.keys()):
    print("\t{}: {}".format(param_name, best_params[param_name]))

#Logistic Regression ->
# logistic_regr = LogisticRegression(penalty='l2', max_iter=5, random_state=40)
# pipeline_regr = utility.pipeline_setup(logistic_regr)
# pipeline_regr_fitted = pipeline_regr.fit(docs_train.data, docs_train.target)
# regr_predict = pipeline_regr_fitted.predict(docs_test.data)
# utility.print_stats(docs_test.target, regr_predict, 'Logistic Regression')
# utility.draw_roc_curve(docs_test.target, pipeline_regr_fitted.predict_proba(docs_test.data)[:, 1])
Exemplo n.º 3
0
from sklearn.datasets import fetch_20newsgroups
import matplotlib.pyplot as pyplot
from sklearn.naive_bayes import GaussianNB
import utility

docs_train, docs_test = utility.custom_2class_classifier()

model = utility.pipeline_setup(GaussianNB())
model_fitted = model.fit(docs_train.data, docs_train.target)
#print(model)
# make predictions
expected = docs_test.target
predicted = model_fitted.predict(docs_test.data)
utility.print_stats(expected, predicted, 'Naive Bayes Basic')
utility.draw_roc_curve(expected, model_fitted.predict_proba(docs_test.data)[:, 1])