def classification_within_modality(dataFrame, categoria, exposure): ''' ''' dataFrame_result = [] loo = LeaveOneOut() pbar = tqdm(total=loo.get_n_splits(dataFrame)) for ind, pearson in dataFrame.groupby('people'): X = pearson.drop(['trial', 'group', 'people'], 1) y = pearson['group'] loo = LeaveOneOut() for train_index, test_index in loo.split(X): X_train, X_test = X.iloc[train_index], X.iloc[test_index] y_train, y_test = y.iloc[train_index], y.iloc[test_index] #Normalize train_mean = average(X_train, axis=0) X_train_without_mean = subtract(X_train, train_mean) X_test_without_mean = subtract(X_test, train_mean) clf = GaussianNB() clf.class_prior_ = [(1 / 6), (1 / 6), (1 / 6), (1 / 6), (1 / 6), (1 / 6)] pca_ = PCA(random_state=42, svd_solver='full', n_components=0.99) pca = pca_.fit(X_train_without_mean) X_train_pca = pca.transform(X_train_without_mean) X_test_pca = pca.transform(X_test_without_mean) clf = clf.fit(X_train_pca, y_train) y_pred = clf.predict(X_test_pca) dataFrame_result.append( [ind, y_pred, y_test.values, categoria, exposure]) pbar.update(1) return dataFrame_result
def deserialize_gaussian_nb(model_dict): model = GaussianNB(model_dict['params']) model.classes_ = np.array(model_dict['classes_']) model.class_count_ = np.array(model_dict['class_count_']) model.class_prior_ = np.array(model_dict['class_prior_']) model.theta_ = np.array(model_dict['theta_']) model.sigma_ = np.array(model_dict['sigma_']) model.epsilon_ = model_dict['epsilon_'] return model
def NB_predict(): X = json.loads(request.form['X']) params = json.loads(request.form['params']) clf = GaussianNB() clf.class_prior_ = np.array(params['class_prior']) clf.class_count_ = np.array(params['class_count']) clf.theta_ = np.array(params['theta']) clf.sigma_ = np.array(params['sigma']) clf.classes_ = np.array(params['classes']) y = clf.predict(X) return jsonify(pred=y.tolist())
def classification_across_modality(dataFrame1, dataFrame2, inp, exp): dataFrame_result = [] for (ind_1, pearson_1), (ind_2, pearson_2) in list( zip(dataFrame1.groupby('people'), dataFrame2.groupby('people'))): X_train = pearson_1.drop(['trial', 'group', 'people'], 1) y_train = pearson_1['group'] X_test = pearson_2.drop(['trial', 'group', 'people'], 1) y_test = pearson_2['group'] #Normalize train_mean = average(X_train, axis=0) X_train_without_mean = subtract(X_train, train_mean) X_test_without_mean = subtract(X_test, train_mean) clf = GaussianNB() clf.class_prior_ = [(1 / 6), (1 / 6), (1 / 6), (1 / 6), (1 / 6), (1 / 6)] pca_ = PCA(random_state=42, svd_solver='full', n_components=0.99) pca = pca_.fit(X_train_without_mean) X_train_pca = pca.transform(X_train_without_mean) X_test_pca = pca.transform(X_test_without_mean) clf = clf.fit(X_train_pca, y_train) y_pred = clf.predict(X_test_pca) for y_i_pred, y_i_test in list(zip(y_pred, y_test.values)): dataFrame_result.append([ind_1, y_i_pred, y_i_test, inp, exp]) return dataFrame_result
# coding: utf8 # 在scikit-learn中,提供了3中朴素贝叶斯分类算法:GaussianNB(高斯朴素贝叶斯)、MultinomialNB(多项式朴素贝叶斯)、BernoulliNB(伯努利朴素贝叶斯) import numpy as np from sklearn.naive_bayes import GaussianNB, MultinomialNB, BernoulliNB X = np.asarray([[-1, -1], [-2, -2], [-3, -3], [-4, -4], [-5, -5], [1, 1], [2, 2], [3, 3]]) y = np.asarray([1, 1, 1, 1, 1, 2, 2, 2]) clf = GaussianNB() clf.class_prior_ = [0.675, 0.325] clf.fit(X, y) print(clf.predict([[-1, -1], [2,3]]))
tailles = np.concatenate( (taille_h,taille_f) ) poids = np.concatenate( (poids_h,poids_f) ) data = np.column_stack( (tailles,poids) ) gnb.fit(data, classes) ####### # MAP ####### y_pred=gnb.predict(data) p_err, err=computeNaif(y_pred,classes) print("Prédiction MAP", gnb.class_prior_ , "% d'erreurs :" , p_err*100, "soit", err ,"/", len(y_pred)) ####### # ML ####### gnb.class_prior_ = [0.48,0.52] y_pred=gnb.predict(data) p_err, err=computeNaif(y_pred,classes) print("Prédiction ML", gnb.class_prior_ , "% d'erreurs :" , p_err*100, "soit", err ,"/", len(y_pred)) ####### # Naif ####### gnb.class_prior_ = [0.5,0.5] y_pred=gnb.predict(data) p_err, err=computeNaif(y_pred,classes) print("Prédiction Naif", gnb.class_prior_ , "% d'erreurs :" , p_err*100, "soit", err ,"/", len(y_pred)) ####### # Twice #######
# replace missing (-1) values for distance with max int size imp = Imputer(-1) train_feats = imp.fit_transform(train_feats) # train_feats[train_feats == -1] = sys.maxsize print_stars() print('Multinomial Naive Bayes') mnb = MultinomialNB(class_prior=[0.9, 0.1]) _, _, _, mnb_probs = train_test_print(mnb, train_feats, test_feats, train_mask, test_mask) print_stars() print('Gaussian Naive Bayes') gnb = GaussianNB() gnb.class_prior_ = [0.9, 0.1] _, _, _, gnb_probs = train_test_print(gnb, train_feats, test_feats, train_mask, test_mask) print_stars() print('Naive Bayes with Square Kernel') gnb2 = GaussianNB() gnb2.class_prior_ = [0.9, 0.1] test_prods = pairwise_products(test_feats) train_prods = pairwise_products(train_feats) _, _, _, gnb2_probs = train_test_print(gnb2, train_prods, test_prods, train_mask, test_mask) print_stars() print('K-Nearest Neighbors, k = 25') knn = KNeighborsClassifier(n_neighbors=25, weights='distance')
print(f"Classification Accuracy: {accuracy}") """ ROC CURVE 1""" fpr, tpr, thresholds = metrics.roc_curve(testY, prediction) roc_auc = metrics.auc(fpr, tpr) print('ROC_AUC = ', roc_auc) display = metrics.RocCurveDisplay(fpr=fpr, tpr=tpr, roc_auc=roc_auc , estimator_name= None) display.plot() plt.show() """CHANGING PRIORS TO 0.1 AND 0.9""" classifier.class_prior_ = [0.1,0.9] prediction = classifier.predict(testX) prediction_train = classifier.predict(trainX) print(classification_report(prediction_train,trainY)) print(classification_report(prediction,testY)) """CONFUSION MATRICIES""" # Train Accuracy cm = confusion_matrix(trainY, prediction_train) plt.figure(figsize=(5, 5)) ax = sns.heatmap(cm, annot=True, fmt="d") plt.ylabel('True Values') plt.xlabel('Predicted Values') accuracy = accuracy_score(trainY, prediction_train)
# replace missing (-1) values for distance with max int size imp = Imputer(-1) train_feats = imp.fit_transform(train_feats) # train_feats[train_feats == -1] = sys.maxsize print_stars() print('Multinomial Naive Bayes') mnb = MultinomialNB(class_prior=[0.9, 0.1]) _, _, _, mnb_probs = train_test_print( mnb, train_feats, test_feats, train_mask, test_mask) print_stars() print('Gaussian Naive Bayes') gnb = GaussianNB() gnb.class_prior_ = [0.9, 0.1] _, _, _, gnb_probs = train_test_print( gnb, train_feats, test_feats, train_mask, test_mask) print_stars() print('Naive Bayes with Square Kernel') gnb2 = GaussianNB() gnb2.class_prior_ = [0.9, 0.1] test_prods = pairwise_products(test_feats) train_prods = pairwise_products(train_feats) _, _, _, gnb2_probs = train_test_print( gnb2, train_prods, test_prods, train_mask, test_mask) print_stars() print('K-Nearest Neighbors, k = 25') knn = KNeighborsClassifier(n_neighbors=25, weights='distance')