gd.fit(X, Y) return gd #SVC(C=10, gamma=0.001, kernel='linear') if __name__ == "__main__": #importing the datasets df = pd.read_csv(config.KFOLD_TRAIN_DATA) os_df = pd.read_csv(config.OVERSAMPLED_TRAIN_DATA) test_df = pd.read_csv(config.TEST_DATA) #calculate intial score without hyperparameter tuning #using K-stratified K fold model = svm.SVC() intial_score = score(df, model) print(f"Intial roc_auc Score is : {intial_score}") #calculter score for test data x_test = test_df.drop('fraudulent', axis=1).values y_test = test_df.fraudulent.values y_pred = model.predict(x_test) test_score = metrics.roc_auc_score(y_test, y_pred) print(f'Intial Test score : {test_score}') #tune hyperparameter and getting best parameters params = best_parameter(os_df) print(f"Best parameter are {params}") #training our training dataset on the best hyperparameter
training_data = [data.data[i] for i in training_data] training_labels = [data.labels[i] for i in data.training_rows] test_data = [data.data[i] for i in labels] print("\nWorking with %s, out file prefix %s" % (l_f, filename)) data = np.asarray(training_data) print("Data shape overall: ", data.shape) clf = ExtraTreesClassifier() clf = clf.fit(data, training_labels) important_features = filename + ".important_features" with open(important_features, "w") as f: for i in range(len(clf.feature_importances_)): f.write("%d\t%f\n" % (i, clf.feature_importances_[i])) model = SelectFromModel(clf, prefit = True) data_new = model.transform(data) print("New data shape: ", data_new.shape) print() test_data = model.transform(test_data) svm_clf = svm.SVC() svm_clf.fit(data_new, training_labels) predictions = svm_clf.predict(test_data) with open(filename + ".predict", "w") as f: for i in range(len(labels)): f.write("%d %d\n" % (labels[i], predictions[i]))
return xx, yy def plot_contours(ax, clf, xx, yy, **params): Z = clf.predict(np.c_[xx.ravel(), yy.ravel()]) Z = Z.reshape(xx.shape) out = ax.contourf(xx, yy, Z, **params) return out iris = datasets.load_iris() X = iris.data[:, :2] y = iris.target C = 1.0 models = (svm.SVC(kernel='linear', C=C), svm.LinearSVC(C=C), svm.SVC(kernel='rbf', gamma=0.7, C=C), svm.SVC(kernel='poly', degree=3, C=C)) models = (clf.fit(X, y) for clf in models) titles = ('SVC with linear kernel', 'LinearSVC (linear kernel)', 'SVC with RBF kernel', 'SVC with polynomial (degree 3) kernel') fig, sub = plt.subplots(2, 2) plt.subplots_adjust(wspace=0.4, hspace=0.4) X0, X1 = X[:, 0], X[:, 1] xx, yy = make_meshgrid(X0, X1) for clf, title, ax in zip(models, titles, sub.flatten()): plot_contours(ax, clf, xx, yy, cmap=plt.cm.coolwarm, alpha=0.8)
#clf = GaussianNB() ### Task 5: Tune your classifier to achieve better than .3 precision and recall ### using our testing script. Check the tester.py script in the final project ### folder for details on the evaluation method, especially the test_classifier ### function. Because of the small size of the dataset, the script uses ### stratified shuffle split cross validation. For more info: ### http://scikit-learn.org/stable/modules/generated/sklearn.cross_validation.StratifiedShuffleSplit.html # Example starting point. Try investigating other evaluation techniques! from sklearn import svm estimators = [('scaler', StandardScaler()), ('feature_selection', SelectKBest()), ('reducer', PCA(random_state=42)), ('svm', svm.SVC())] pipe = Pipeline(estimators) param_grid = ([{ 'feature_selection__k': [10, 13, 15, 'all'], 'reducer__n_components': [2, 4, 6, 8, 10], 'svm__C': np.logspace(-2, 3, 6), 'svm__gamma': np.logspace(-4, 1, 6), 'svm__class_weight': ['balanced', None], 'svm__kernel': ['rbf', 'sigmoid'] }]) grid_search = GridSearchCV(pipe, param_grid, scoring='precision', cv=sss) grid_search.fit(features, labels) #labels_predictions = grid_search.predict(features_test) clf = grid_search.best_estimator_
train_data_generator = TrainingDataGenerator("Training_data.json", filter_funct=batch_index_filter_1) test_data_generator = TrainingDataGenerator("Training_data.json", filter_funct=batch_index_filter_2) X_train, y_train = train_data_generator.get_data() X_test, y_test = test_data_generator.get_data() # defining a pipeline for calibration purposes cal_pipeline = create_extraction_pipeline(variance=0.99, n_avgs=2) cal_pipeline.fit(X_test) # preprocessing the pulled data extraction_pipeline = create_extraction_pipeline(variance=0.99, n_avgs=2) train_extracted_features = extraction_pipeline.fit_transform(X_train) cal_mean = cal_pipeline.get_params()['std_scaler'].mean_ cur_mean = extraction_pipeline.get_params()['std_scaler'].mean_ new_mean = np.append(cal_mean, cur_mean[cal_mean.shape[0]:]) extraction_pipeline.get_params()['std_scaler'].mean_ = new_mean test_extracted_features = extraction_pipeline.transform(X_test) # defining the classifier and getting predictions poly_clf = svm.SVC(kernel="poly", degree=3, C=1000) poly_clf.fit(train_extracted_features, y_train) y_pred = poly_clf.predict(test_extracted_features) # generating a confusion matrix print("Accuracy score : ", accuracy_score(y_test, y_pred)) movement_labels = train_data_generator.get_movement_labels() generate_confusion_matrix(y_pred, y_test, movement_labels)
from sklearn import svm from sklearn.datasets import load_svmlight_files X_train, y_train, X_test, y_test = load_svmlight_files( ('data/ml14fall_train.dat', 'data/ml14fall_test1_no_answer.dat')) print "read data finished" poly_clf = svm.SVC(kernel='poly', degree=5) poly_clf = poly_clf.fit(X_train[:50], y_train[:50]) print "fit model finished" prediction = poly_clf.predict(X_test[:50]) print prediction def write_result(pred, result_path): result_content = '\n'.join([str(int(p)) for p in pred]) with open(result_path, 'w') as result: result.write(result_content) print "result has saved into %s" % result_path write_result(prediction, 'poly_5_path')
#Create list of which pulses were classified incorrectly inc_ind = [i for i in range(len(y_test)) if y_test[i]!=pred_comb[i]] incorrect = [(y_test[i],pred_comb[i],clf.pred[1][i],prob[1][i][1],clf.pred[2][i],prob[2][i][1],clf.pred[3][i],prob[3][i][1],clf.pred[4][i],prob[4][i][1]) for i in inc_ind] #Print diagnostics print(classification_report(y_test, pred_comb,digits=5)) #Other classifiers I tried: #Naive Bayes classifier; calculate score clf_nb = GaussianNB() score_nb = cross_validation.cross_val_score(clf_nb,X,y,cv = cv) score_nb_mean = score_nb.mean() score_nb_std = score_nb.std() #SVM classifier; calculate score clf_svm = svm.SVC() score_svm = cross_validation.cross_val_score(clf_svm,X,y,cv = cv) score_svm_mean = score_svm.mean() score_svm_std = score_svm.std() #Random Forest classifier; calculate score, predicted labels, confusion matrix clf_rf = RandomForestClassifier() score_rf = cross_validation.cross_val_score(clf_rf,X,y,cv = cv) score_rf_mean = score_rf.mean() score_rf_std = score_rf.std() clf_rf.fit(X_train,y_train) pred_rf = clf_rf.predict(X_test) cm_rf = confusion_matrix(y_test,pred_rf) #Combined classifier with random forest base; calculate score, predicted labels, confusion matrix cwbrf_list = [RandomForestClassifier(),
print(X.shape) mean_fpr = np.linspace(0, 1, 100) tprs = [] aucs = [] k = 0 for train, validation in kfold.split(X, Y): # clf = RandomForestClassifier() # clf.fit(X[train], Y[train]) # clf = XGBClassifier() # clf.fit(X[train], Y[train]) clf = svm.SVC(kernel='rbf', probability=True) clf.fit(X[train], Y[train]) # y_score = clf.predict(X_test, 1) # Y_pred = clf.predict_proba(X)[:, 1] # clf = LogisticRegressionCV(cv=5, penalty='l2', tol=0.0001, fit_intercept=True, intercept_scaling=1, # class_weight=None, random_state=None, # max_iter=100, verbose=0, n_jobs=None).fit(X[train], Y[train]) y_score = clf.predict_proba(X[validation])[:, 1] # 评估 fpr, tpr, threshold = roc_curve(Y[validation], y_score, pos_label=1) ###计算真正率和假正率 roc_auc = auc(fpr, tpr) ###计算auc的值 aucs.append(roc_auc)
with open('/Users/anshulramachandran/Desktop/all_train.csv', newline='') as csvfile: filereader = csv.reader(csvfile, delimiter=',') for row in filereader: trainX.append([float(val) for val in row[1:]]) trainY.append(int(row[0]) - 1) with open('/Users/anshulramachandran/Desktop/all_validation.csv', newline='') as csvfile: filereader = csv.reader(csvfile, delimiter=',') for row in filereader: testX.append([float(val) for val in row[1:]]) testY.append(int(row[0]) - 1) print('start') clf = svm.SVC(gamma=0.001, C=100.) clf.fit(trainX, trainY) train_acc = clf.score(trainX, trainY) test_acc = clf.score(testX, testY) print(train_acc, test_acc) validation_predictions = clf.predict(testX) # Generate confusion matrix confusion_matrix = np.zeros(shape=(200, 200)) for i in range(len(testY)): class_true = testY[i] class_pred = validation_predictions[i] confusion_matrix[class_true][class_pred] += 1
# ERREUR erreur.append(metrics.zero_one_loss(ytest, clf_ANN.predict(xtest))) # PRECISION score = clf_ANN.score(xtest, ytest) precision.append(score) print(" ANN précision : ", score) # TEMPS t = time.process_time() - begin temps.append(t) print("Temps écoulé ANN : ", t) cm = confusion_matrix(ytest, clf_ANN.predict(xtest)) print("Matrice de confusion:\n", cm, "\n") # SVM begin = time.process_time() clf_SVM = svm.SVC(kernel='poly', C=0.6) clf_SVM.fit(xtrain, ytrain) # ERREUR erreur.append(metrics.zero_one_loss(ytest, clf_SVM.predict(xtest))) # PRECISION score = clf_SVM.score(xtest, ytest) precision.append(score) print(" SVM précision : ", score) # TEMPS t = time.process_time() - begin temps.append(t) print("Temps écoulé SVM : ", t, "\n") cm = confusion_matrix(ytest, clf_SVM.predict(xtest)) print("Matrice de confusion:\n", cm, "\n") algo = ['KNN', 'ANN', 'SVM']
# kernel function def gaussian_kernel(x1, x2, sigma): return np.exp(- np.power(x1 - x2, 2).sum() / (2 * (sigma ** 2))) # 2.2 Data Preprocess---------------------------------------------------------------------------- # 2.2.1 load data mat = sio.loadmat('ex6data2.mat') data = pd.DataFrame(mat.get('X'), columns=['X1', 'X2']) data['y'] = mat.get('y') # 2.2.2 plot sns.set(context="notebook", style="white", palette=sns.diverging_palette(240, 10, n=2)) sns.lmplot('X1', 'X2', hue='y', data=data, size=5, fit_reg=False, scatter_kws={"s": 10} ) # plt.show() # 2.3 SVM ---------------------------------------------------------------------------------------------- svc = svm.SVC(C=100, kernel='rbf', gamma=10, probability=True) # non-linear SVM svc.fit(data[['X1', 'X2']], data['y']) svc.score(data[['X1', 'X2']], data['y']) # mean accuracy predict_prob = svc.predict_proba(data[['X1', 'X2']])[:, 1] # predict_proba return ndarray (data size, class) # use [:, 1] or [:, 0] to define the type we want classify out fig, ax = plt.subplots(figsize=(8, 6)) ax.scatter(data['X1'], data['X2'], s=30, c=predict_prob, cmap='Reds') # c means the type want to classify out plt.show()
def fit_with_kernel(inFeatures, inLabels, inKernel="rbf", maxIter=-1): toReturn = svm.SVC(kernel=inKernel, max_iter=maxIter) toReturn.fit(inFeatures, inLabels) return toReturn
def Train_And_Test_Image_Classifier(split): phone_images = [] for image_file in [ img_f for img_f in os.listdir(".") if img_f.startswith("yes") and img_f.endswith("jpg") ]: image = imageio.imread(image_file) image = img_as_float(image) image = rgb2gray(image) image_prewitt = prewitt(image) phone_images.append([image, image_prewitt]) n_phone_images = len(phone_images) #split phone images into training and testing sets factor_pi = int(n_phone_images / 3) training_phone_images = [] testing_phone_images = [] if split == 0: ##The last 1/3 training_phone_images = phone_images[:factor_pi * 2] testing_phone_images = phone_images[factor_pi * 2:] elif split == 1: ##The first 1/3 training_phone_images = phone_images[factor_pi:] testing_phone_images = phone_images[:factor_pi] else: ##The middle 1/3 training_phone_images = phone_images[:factor_pi] + phone_images[ factor_pi * 2:] testing_phone_images = phone_images[factor_pi:factor_pi * 2] non_phone_images = [] for image_file in [ img_f for img_f in os.listdir(".") if img_f.startswith("no") and img_f.endswith("jpg") ]: image = imageio.imread(image_file) image = img_as_float(image) image = rgb2gray(image) image_prewitt = prewitt(image) non_phone_images.append([image, image_prewitt]) n_non_phone_images = len(non_phone_images) #split none phone images into training and testing sets factor_npi = int(n_non_phone_images / 3) training_non_phone_images = [] testing_non_phone_images = [] if split == 0: ##The last 1/3 training_non_phone_images = non_phone_images[:factor_npi * 2] testing_non_phone_images = non_phone_images[factor_npi * 2:] elif split == 1: ##The first 1/3 training_non_phone_images = non_phone_images[factor_npi:] testing_non_phone_images = non_phone_images[:factor_npi] else: ##The middle 1/3 training_non_phone_images = non_phone_images[: factor_npi] + non_phone_images[ factor_npi * 2:] testing_non_phone_images = non_phone_images[factor_npi:factor_npi * 2] training_set = training_phone_images + training_non_phone_images training_set_output = [1] * len(training_phone_images) + [0] * len( training_non_phone_images) testing_set = testing_phone_images + testing_non_phone_images testing_set_output = [1] * len(testing_phone_images) + [0] * len( testing_non_phone_images) n_training_set = len(training_set) training_set = np.array(training_set) training_set = training_set.reshape(n_training_set, -1) n_testing_set = len(testing_set) testing_set = np.array(testing_set) testing_set = testing_set.reshape(n_testing_set, -1) classifier = svm.SVC(C=100, probability=True, random_state=0) classifier.fit(training_set, training_set_output) pickle.dump(classifier, open("cellphone_image_classifier.sav", "wb")) predicted = classifier.predict(testing_set) print(classifier.score(testing_set, testing_set_output)) i = 0 while i < len(testing_set_output): if predicted[i] != testing_set_output[i]: print(i, predicted[i], testing_set_output[i]) i += 1 print("Classification report for classifier %s:\n%s\n" % (classifier, metrics.classification_report(testing_set_output, predicted))) predict_prob = classifier.predict_proba(testing_set) i = 0 while i < len(testing_set_output): if predict_prob[i][0] > predict_prob[i][1]: if testing_set_output[i] != 0: print(i, "0: ", predict_prob[i][0], "1: ", predict_prob[i][1], " shouldbe:1") else: if testing_set_output[i] != 1: print(i, "0: ", predict_prob[i][0], "1: ", predict_prob[i][1], " shouldbe:0") i += 1 print(" ")
from sklearn.datasets import load_iris from sklearn.decomposition import PCA import numpy as np from sklearn import svm import matplotlib.pyplot as plt iris = load_iris() pca = PCA(n_components=2) data = pca.fit(iris.data).transform(iris.data) print(data.shape) datamax = data.max(axis=0) + 1 datamin = data.min(axis=0) - 1 print(datamax) print(datamin) n = 2000 X, Y = np.meshgrid(np.linspace(datamin[0], datamax[0], n), np.linspace(datamin[1], datamax[1], n)) svc = svm.SVC() svc.fit(data, iris.target) Z = svc.predict(np.c_[X.ravel(), Y.ravel()]) print(np.unique(Z)) plt.contour(X, Y, Z.reshape(X.shape), levels=[0, 1], colors=['r', 'g']) # plt.show() for i, c in zip([0, 1, 2], ['r', 'g', 'b']): d = data[iris.target == i] plt.scatter(d[:, 0], d[:, 1], c=c) plt.show()
# replace all missing values(?) with -99999 df.replace('?', -99999, inplace=True) # drop id column since it is not a useful feature df.drop(['id'], 1, inplace=True) #input data X = np.array(df.drop(['class'], 1)) # output data y = np.array(df['class']) # Split input data to training and test data X_train, X_test, y_train, y_test = cross_validation.train_test_split( X, y, test_size=0.2) # initialize k means classifier clf = svm.SVC() # train the classifier clf.fit(X_train, y_train) # find the accuracy accuracy = clf.score(X_test, y_test) print(accuracy) # unknown sample for prediction example_measures = np.array([[4, 2, 1, 1, 1, 2, 3, 2, 1], [4, 2, 1, 1, 1, 2, 3, 2, 1]]) # avoid deprecation errors, len defines the number of samples example_measures = example_measures.reshape(len(example_measures), -1) # prediction
""" This module sets up different machines for machine learing algorithms based on the needs for the user. Set up as a library to import with mirrored commands for each machine. """ #This is the Support Vector Machine Section from sklearn import svm SVM = svm.SVC() def SVMfit(x, y): """ Input: x, y x (Array): An array of training points for the svm to set up an algorithm. y (Array): An array of values for their corresponding training point. Returns: NA Description: Sets the svm with an algorithm to predict the input data values. """ SVM.fit(x, y) def SVMpredict(x): """ Input: x x (Array): An array of a data point to predict the value of. Returns: The predicted value of the data point. Description: Uses a svm to predict the value of the input data point. """ return SVM.predict(x)
#模型选取的特征 select_feature_list = ['call_count_per_day', 'phone_loan_times_per_platform',\ 'idcard_loan_platform_num', 'idcard_loan_times_per_platform',\ 'call_count', 'sustained_days', 'gender'] #创建学习模型 rf = RF(n_estimators = 40) ada_tree = Ada(n_estimators = 40) lr = LR() nb1 = MultinomialNB() nb2 = GaussianNB() s_v_m = svm.SVC(C = 1) ada_lr = Ada(base_estimator = LR(),n_estimators = 40,algorithm='SAMME') ada_nb2 = Ada(base_estimator = GaussianNB(),n_estimators = 40,algorithm='SAMME') ada_svm = Ada(base_estimator = svm.SVC(),n_estimators = 40,algorithm='SAMME') #返回模型准确率 def model_estimate(clf, X, y, select_feature_list): result = [] for i in range(10): num = int(len(y)*0.7) random_index = np.random.permutation(len(y)) build_index = random_index[:num] test_index = random_index[num:] X_build = X.iloc[build_index].copy() y_build = y.iloc[build_index]
housing_pct['US_HPI_future'] = housing_pct['United States'].shift(-1) housing_pct.dropna(inplace=True) housing_pct['label'] = list(map(create_labels, housing_pct['United States'], housing_pct['US_HPI_future'])) # housing_pct['ma_apply_example'] = housing_pct['M30'].rolling(window=10).apply(moving_average) # print(housing_pct.tail()) X = np.array(housing_pct.drop(['label', 'US_HPI_future'], 1)) y = np.array(housing_pct['label']) X = scale(X) X_train, x_test, y_train, y_test = train_test_split(X, y, test_size=0.2) # clf = svm.SVC(kernel='linear') # clflog = LogisticRegression(C=50.0, dual=False, penalty="l1") clflog_accuracy = [] clfsvm_accuracy = [] for i in range(10): clflog = LogisticRegression(C=49.0, dual=False, penalty="l1") clflog.fit(X_train, y_train) clflog_accuracy.append(clflog.score(x_test,y_test)) clfsvm = svm.SVC(kernel='linear') clfsvm.fit(X_train, y_train) clfsvm_accuracy.append(clfsvm.score(x_test,y_test)) print('Accuracy of logistic regression = %0.4f' % (mean(clflog_accuracy) * 100)) print('Accuracy of support vector machine = %0.4f' % (mean(clfsvm_accuracy) * 100))
X = hf['x'][:] Y = hf['y'][:] return X, Y x,y = load_h5py('Data/data_3.h5') print y max=-1 maxindex=-1 res=np.zeros(x.shape[0],np.amin(y)-np.amax(y)+1) for i in range(np.amin(y),np.amax(y)+1): print i y_train=np.zeros(y.shape) for j in range(0,y.shape[0]): if(y[j]==i): y_train[j]=1 else: y_train[j]=0 # print y_train model=svm.SVC(kernel='linear') model.fit(x,y_train) m=model.coef_ res.append(np.dot(x,m.T)) print res[0] for i in range(0,x.shape[0]):
for finger in fingerPos: if finger[0] < fingerPos[shortIndex][0]: shortIndex = n if finger[0] > fingerPos[longIndex][0]: longIndex = n n += 1 longestDist = hr.dist(fingerPos[shortIndex][0], fingerPos[longIndex][0], fingerPos[shortIndex][1], fingerPos[longIndex][1]) sampleData = [numFingers,fingerSum,longestDist] samples.append(sampleData) labels.append(label_array[image]) #make svm scaler = preprocessing.StandardScaler().fit(samples) model = svm.SVC() model.fit(scaler.transform(samples),labels) #run test on reserved subset for image in sets[test_index]: im = cv.imread('image_' + str(image) + '.jpeg') segIm = hr.segment(im) palmPos, fingerPos, fingerLen = hr.extract(im, segIm) #Normalized FingerLength Feature numFingers = (len(fingerPos) - scaler.mean_[0]) / scaler.scale_[0] #Normalized FingerSum Feature fingerSum = 0 for length in fingerLen: fingerSum = fingerSum + length
'random_state': 42, 'gamma': 0 } model = XGBClassifier(grid, weights=class_weights) model.fit(X_train, Y_train) from sklearn.metrics import accuracy_score y_pred = model.predict(X_test) # evaluate predictions accuracy = accuracy_score(Y_test, y_pred) print("XGBClassifier Accuracy: %.2f%%" % (accuracy * 100.0)) toc = time.perf_counter() print("XGBClassifier runtime: %.3f seconds" % (toc - tic)) tic = time.perf_counter() model = svm.SVC(class_weight='balanced') model.fit(X_train, Y_train) y_pred = model.predict(X_test) # evaluate predictions accuracy = accuracy_score(Y_test, y_pred) print("SVM Accuracy: %.2f%%" % (accuracy * 100.0)) toc = time.perf_counter() print("SVM runtime: %.3f seconds" % (toc - tic)) from sklearn.neighbors import KNeighborsClassifier tic = time.perf_counter() knn = KNeighborsClassifier(n_neighbors=60) knn.fit(X_train, Y_train) y_pred = knn.predict(X_test) accuracy = accuracy_score(Y_test, y_pred)
def __init__(self, class_num, x_train_raw, y_train_raw): self.tfidf = TfidfVectorizor(x_train_raw) x_train = [self.tfidf.process(sent) for sent in x_train_raw] y_train = y_train_raw self.svm = svm.SVC(decision_function_shape="ovr") self.svm.fit(x_train, y_train)
# # # 1.2) Feature Extraction (Textual Features) # # The terms' weights were calculated using the Term Frequency - Inverse Document Frequency (TF-IDF) tfidf_vect = TfidfVectorizer(analyzer='word', token_pattern=r'\w{1,}', max_features=50000) tfidf_vect.fit(x_text) x_text_tfidf = tfidf_vect.transform(x_text) # 1.3) Feature Selection (Textual Features) # Feature selection using a chi-square score was applied for each applied machine learning algorithm to select relevant textual features. # COMMENT OUT following code block for experimenting different feature sizes for each classifier clf = clf = svm.SVC() for x in range(5, 23, 15): test = SelectKBest(score_func=chi2, k=x) fit = test.fit(x_sm, y) x_s = fit.transform(x_sm) scores = cross_val_score(clf, x_s, y, cv=10) # print(scores) test = SelectKBest(score_func=chi2, k=15) fit = test.fit(x_sm, y) x_s = fit.transform(x_sm) clf = svm.SVC() for x in range(500, 4000, 500): test = SelectKBest(score_func=chi2, k=x) fit = test.fit(x_text_tfidf, y) x_t = fit.transform(x_text_tfidf)
import numpy as np from sklearn import cross_validation from sklearn import datasets from sklearn import svm diabets = datasets.load_diabetes() X_train, X_test, y_train, y_test = \ cross_validation.train_test_split( diabets.data, diabets.target, test_size=0.2, random_state=0) print (X_train.shape, y_train.shape) # test size 20% print (X_test.shape, y_test.shape) clf = svm.SVC(kernel='linear', C=1) scores = cross_validation.cross_val_score( clf, diabets.data, diabets.target, cv=4) # 4-folds print (scores) print("Accuracy: %0.2f (+/- %0.2f)" %(scores.mean(), scores.std())) # wow, thats a a very shitty accuracy score
#Ağdaki(mesh) adım boyutu h = .02 y_30 = np.copy(y) y_30[rand.rand(len(y)) < 0.3] = -1 y_50 = np.copy(y) y_50[rand.rand(len(y)) < 0.8] = -1 #SVM (not scaled cuz we want to plot the support vectors) ls30 = (label_propagation.LabelSpreading().fit(X, y_30), y_30) ls50 = (label_propagation.LabelSpreading().fit(X, y_50), y_50) ls100 = (label_propagation.LabelSpreading().fit(X, y), y) rbf_svc = (svm.SVC(kernel='rbf', gamma = 0.5).fit(X, y), y) #Create mesh to plot in x_min, x_max = X[:,0].min() - 1, X[:, 0].max() + 1 y_min, y_max = X[:,1].min() - 1, X[:, 1].max() + 1 xx, yy = np.meshgrid(np.arange(x_min, x_max, h), np.arange(y_min, y_max, h)) #title for plots titles = ['LS %30 data', 'LS %50 data', 'LS %100 data', 'SVC with RBF'] color_map = {-1: (1, 1, 1), 0:(0,0,0.9), 1: (1,0,0), 2: (0.8, 0.6, 0)} for i, (clf, y_train) in enumerate((ls30, ls50, ls100, rbf_svc)):
testing = pd.read_csv('../output/fit_prediction_{}_{}_of_2'.format( pathway, fold_num), delimiter='\t') if drop_cols: ncols = training.shape[1] training.drop(drop_cols, axis=1, inplace=True) assert training.columns.shape[0] == ncols - len(drop_cols) ncols = testing.shape[1] testing.drop(drop_cols, axis=1, inplace=True) assert testing.columns.shape[0] == ncols - len(drop_cols) # fit SVM on interactome_train model = svm.SVC(kernel=kernel, probability=True, class_weight=class_weight) train_on = training.ix[:, ~training.columns.isin(['name', 'class'])] test_on = testing.ix[:, ~testing.columns.isin(['name', 'class'])] model.fit(train_on, training['class']) # save prediction as confidence, not as class predicted_probab = model.predict_proba(test_on) # reformat name such that it is #tail head score predicted_probab_df = pd.DataFrame(predicted_probab) predicted_df = testing['name'].\ str.split('_to_', expand=True)
for index, (image, label) in enumerate(images_and_labels[:4]): plt.subplot(2, 4, index + 1) plt.axis('off') plt.imshow(image, cmap=plt.cm.gray_r, interpolation='nearest') plt.title('Training: %i' % label) # To apply a classifier on this data, we need to flatten the image, to # turn the data in a (samples, feature) matrix: print digits.images n_samples = len(digits.images) data = digits.images.reshape((n_samples, -1)) print data digits.images # Create a classifier: a support vector classifier classifier = svm.SVC(gamma=0.001) # We learn the digits on the first half of the digits classifier.fit(data[:n_samples / 2], digits.target[:n_samples / 2]) #persistence model s = pickle.dumps(classifier) joblib.dump(classifier, 'perPredict.pkl') # Now predict the value of the digit on the second half: expected = digits.target[n_samples / 2:] predicted = classifier.predict(data[n_samples / 2:]) print("Classification report for classifier %s:\n%s\n" % (classifier, metrics.classification_report(expected, predicted))) print("Confusion matrix:\n%s" % metrics.confusion_matrix(expected, predicted))
# Binarize the output y = label_binarize(y, classes=[0, 1, 2]) n_classes = y.shape[1] # Add noisy features to make the problem harder random_state = np.random.RandomState(0) n_samples, n_features = X.shape X = np.c_[X, random_state.randn(n_samples, 200 * n_features)] # shuffle and split training and test sets X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=.5, random_state=0) # Learn to predict each class against the other classifier = OneVsRestClassifier(svm.SVC(kernel='linear', probability=True, random_state=random_state)) y_score = classifier.fit(X_train, y_train).decision_function(X_test) # Compute ROC curve and ROC area for each class fpr = dict() tpr = dict() roc_auc = dict() for i in range(n_classes): fpr[i], tpr[i], _ = roc_curve(y_test[:, i], y_score[:, i]) roc_auc[i] = auc(fpr[i], tpr[i]) # Compute micro-average ROC curve and ROC area fpr["micro"], tpr["micro"], _ = roc_curve(y_test.ravel(), y_score.ravel()) roc_auc["micro"] = auc(fpr["micro"], tpr["micro"]) plt.figure()
sys.path.append("../tools/") from email_preprocess import preprocess from sklearn import svm ### features_train and features_test are the features for the training ### and testing datasets, respectively ### labels_train and labels_test are the corresponding item labels features_train, features_test, labels_train, labels_test = preprocess() ######################################################### ### your code goes here ### #features_train = features_train[:len(features_train)/100] #labels_train = labels_train[:len(labels_train)/100] clf = svm.SVC(kernel='rbf', C=10000) t0 = time() clf.fit(features_train, labels_train) print "training time:", round(time() - t0, 3), "s" t0 = time() print "Score of naive bayes algorithm:", clf.score(features_test, labels_test) print "Score time:", round(time() - t0, 3), "s" pred = clf.predict(features_test) n = 0 for m in pred: if m == 1: n += 1
pca_a = PCA(n_components=reducedDim_a) pca_a.fit(training_data_proso) # Transform training_data and testing data respectively training_data_proso_transformed = pca_a.transform(training_data_proso) testing_data_proso_tansformed = pca_a.transform(testing_data_proso) # Concatenate ‘video training_data’ and ‘audio training_data’ into a new feature ‘combined_trainingData’ sample_train = np.concatenate( (training_data_transformed, training_data_proso_transformed), axis=1) # Concatenate ‘video testing_data’ and ‘audio testing_data2 into a new feature ‘combined_testingData’. sample_test = np.concatenate( (testing_data_transformed, testing_data_proso_tansformed), axis=1) # Train SVM classifier clf = svm.SVC(kernel='linear') clf.fit(sample_train, training_class) # The prediction results of training data and testing data respectively pred_train = clf.predict(sample_train) pred_test = clf.predict(sample_test) # Calculate and Print the training accuracy and testing accuracy. print('training accuracy: {}'.format( accuracy_score(training_class, pred_train, normalize=True))) print('testing accuracy: {}'.format( accuracy_score(testing_class, pred_test, normalize=True))) print('confusion matrix training:\n{}'.format( confusion_matrix(training_class, pred_train))) print('confusion matrix testing:\n{}'.format(