import utility import numpy from sklearn.datasets import fetch_20newsgroups from sklearn.svm import SVC from sklearn.grid_search import GridSearchCV from sklearn.linear_model import LogisticRegression from sklearn import metrics # Load the dataset docs_train, docs_test = utility.custom_2class_classifier() #SVM -> svm = SVC(kernel='linear', probability=True, random_state=40) pipeline_svm = utility.pipeline_setup(svm) #pipeline_svm obj to be used in all svm algos pipeline_svm_fitted = pipeline_svm.fit(docs_train.data, docs_train.target) # svm_predict = pipeline_svm_fitted.predict(docs_test.data) # utility.print_stats(docs_test.target, svm_predict, 'SVM Normal') # utility.draw_roc_curve(docs_test.target, pipeline_svm_fitted.predict_proba(docs_test.data)[:, 1]) #Soft margin SVM -> #confirm this part, not sure of any other way to implement soft margin SVM params = { 'learning_algo__gamma': [1e-3, 1e3] #10^-3 to 10^3 } svm_soft_margin = GridSearchCV(pipeline_svm, params, cv=5) svm_soft_margin_fitted = svm_soft_margin.fit(docs_train.data, docs_train.target) svm_soft_margin_predict = svm_soft_margin_fitted.predict(docs_test.data) utility.print_stats(docs_test.target, svm_soft_margin_predict, 'Soft Margin SVM') utility.draw_roc_curve(docs_test.target, svm_soft_margin_fitted.predict_proba(docs_test.data)[:, 1]) best_params = svm_soft_margin.best_estimator_.get_params() for param_name in sorted(params.keys()):
import utility import numpy from sklearn.datasets import fetch_20newsgroups from sklearn.svm import SVC from sklearn.grid_search import GridSearchCV from sklearn.linear_model import LogisticRegression from sklearn import metrics # Load the dataset docs_train, docs_test = utility.custom_2class_classifier() #SVM -> svm = SVC(kernel='linear', probability=True, random_state=40) pipeline_svm = utility.pipeline_setup( svm) #pipeline_svm obj to be used in all svm algos pipeline_svm_fitted = pipeline_svm.fit(docs_train.data, docs_train.target) # svm_predict = pipeline_svm_fitted.predict(docs_test.data) # utility.print_stats(docs_test.target, svm_predict, 'SVM Normal') # utility.draw_roc_curve(docs_test.target, pipeline_svm_fitted.predict_proba(docs_test.data)[:, 1]) #Soft margin SVM -> #confirm this part, not sure of any other way to implement soft margin SVM params = { 'learning_algo__gamma': [1e-3, 1e3] #10^-3 to 10^3 } svm_soft_margin = GridSearchCV(pipeline_svm, params, cv=5) svm_soft_margin_fitted = svm_soft_margin.fit(docs_train.data, docs_train.target) svm_soft_margin_predict = svm_soft_margin_fitted.predict(docs_test.data) utility.print_stats(docs_test.target, svm_soft_margin_predict, 'Soft Margin SVM') utility.draw_roc_curve(
from sklearn.datasets import fetch_20newsgroups from sklearn.naive_bayes import GaussianNB from sklearn import metrics import utility categories = [ 'comp.sys.ibm.pc.hardware', 'comp.sys.mac.hardware', 'misc.forsale', 'soc.religion.christian' ] docs_train = fetch_20newsgroups( subset='train', categories=categories, shuffle=True, random_state=42) #, remove=('headers','footers','quotes')) docs_test = fetch_20newsgroups( subset='test', categories=categories, shuffle=True, random_state=42) #, remove=('headers','footers','quotes')) model = utility.pipeline_setup(GaussianNB()) model.fit(docs_train.data, docs_train.target) # print(model) # make predictions expected = docs_test.target predicted = model.predict(docs_test.data) utility.print_stats(expected, predicted, 'Naive Bayes Multiclass')
goog = utility.get_news_prices('google') goog.append(utility.get_news_prices('microsoft')) goog.append(utility.get_news_prices('apple')) goog.append(utility.get_news_prices('yahoo')) goog.append(utility.get_news_prices('adobe')) goog.append(utility.get_news_prices('ford')) # Select model of computation: # model = neural_network.MLPRegressor([len(verbnet.classids()), 200, 8], 'relu', 'adam', 0.0001, 200, 'constant', 0.001, 0.5, 200, # True, None, 0.0001, False, False, 0.9, True, False, 0.1, 0.9, 0.999, 1e-08) # model = RandomForestRegressor(n_estimators=50, max_features=30, max_depth=9, n_jobs=1) model = SVC(kernel='linear', probability=True, random_state=40) # model = linear_model.LinearRegression() model = utility.pipeline_setup(model) # model_fitted = model.fit(goog['message'], goog['Threshold Change']) # Select columns: x = goog.message.apply(lambda sentence: utility.get_feature_vector(sentence+".")[0][0]) # x.to_csv('data/google_msg_id.csv') # x = pandas.read_csv('data/google_msg_id.csv') # print x # x = goog['message'] #x = x.apply(lambda i: utility.one_hot(i)) # array = numpy.zeros((len(x), len(verbnet.classids()))) # for results in range(len(x)): # for i in x[results]: # # print i # array[results][i] = 1
from sklearn.datasets import fetch_20newsgroups import matplotlib.pyplot as pyplot from sklearn.naive_bayes import GaussianNB import utility docs_train, docs_test = utility.custom_2class_classifier() model = utility.pipeline_setup(GaussianNB()) model_fitted = model.fit(docs_train.data, docs_train.target) #print(model) # make predictions expected = docs_test.target predicted = model_fitted.predict(docs_test.data) utility.print_stats(expected, predicted, 'Naive Bayes Basic') utility.draw_roc_curve(expected, model_fitted.predict_proba(docs_test.data)[:, 1])
] docs_train = fetch_20newsgroups(subset='train', categories=categories, shuffle=True, random_state=42) docs_test = fetch_20newsgroups(subset='test', categories=categories, shuffle=True, random_state=42) svm_basic = SVC(kernel='linear', class_weight='balanced', probability=True, random_state=40) svm_onerest = OneVsRestClassifier(svm_basic) pipeline_svm_onerest = utility.pipeline_setup(svm_onerest) pipeline_svm_fitted = pipeline_svm_onerest.fit(docs_train.data, docs_train.target) svm_predict = pipeline_svm_fitted.predict(docs_test.data) utility.print_stats(docs_test.target, svm_predict, 'SVM OneVSOne') svm_weighted = SVC( kernel='linear', class_weight='balanced', probability=True, random_state=40 ) #balanced param to make sure both docs have same no. of samples in onevsone svm_oneone = OneVsOneClassifier(svm_weighted) pipeline_svm_oneone = utility.pipeline_setup(svm_oneone) pipeline_svm_fitted = pipeline_svm_oneone.fit(docs_train.data, docs_train.target)
import utility from sklearn.datasets import fetch_20newsgroups from sklearn.svm import SVC from sklearn.multiclass import OneVsOneClassifier, OneVsRestClassifier from sklearn import metrics categories = ['comp.sys.ibm.pc.hardware', 'comp.sys.mac.hardware', 'misc.forsale', 'soc.religion.christian'] docs_train = fetch_20newsgroups(subset='train', categories=categories, shuffle=True, random_state=42) docs_test = fetch_20newsgroups(subset='test', categories=categories, shuffle=True, random_state=42) svm_basic = SVC(kernel='linear', class_weight='balanced', probability=True, random_state=40) svm_onerest = OneVsRestClassifier(svm_basic) pipeline_svm_onerest = utility.pipeline_setup(svm_onerest) pipeline_svm_fitted = pipeline_svm_onerest.fit(docs_train.data, docs_train.target) svm_predict = pipeline_svm_fitted.predict(docs_test.data) utility.print_stats(docs_test.target, svm_predict, 'SVM OneVSOne') svm_weighted = SVC(kernel='linear', class_weight='balanced', probability=True,random_state=40) #balanced param to make sure both docs have same no. of samples in onevsone svm_oneone = OneVsOneClassifier(svm_weighted) pipeline_svm_oneone = utility.pipeline_setup(svm_oneone) pipeline_svm_fitted = pipeline_svm_oneone.fit(docs_train.data, docs_train.target) svm_predict = pipeline_svm_fitted.predict(docs_test.data) utility.print_stats(docs_test.target, svm_predict, 'SVM OneVSRest')