def train(self): if self.uni_gram == True: self.nb_uni = NaiveBayes.NaiveBayes(self.train_tweets) self.nb_uni.train() if self.bi_gram == True: self.nb_bi = NaiveBayes.NaiveBayes(self.train_tweets, bi_gram=True) self.nb_bi.train()
def main(): parser = argparse.ArgumentParser(description="Parse Values.") parser.add_argument('-arg1', 'trainPath', type=str, required=True) parser.add_argument('-arg2', 'testPath', type=str, required=True) parser.add_argument('-arg3', 'n', type=int, required=True) parser.add_argument('-arg4', 'lamda', type=float, required=True) args = parser.parse_args() trainPath = args.trainPath testPath = args.testPath n = args.n lamda = args.lamda nbModel = NaiveBayes() inout = io.IO() trainSet = inout.readDocuments(trainPath, n) testSet = inout.readDocuments(testPath, n) nbModel.train(trainSet) for doc in testSet: bestLanguage = nbModel.mostLikelyLanguage(doc.text, lamda) print(id + "|" + bestLanguage)
def main(): """ main method :return: """ # get data train_df, test_df = generate_df("data/review_polarity/txt_sentoken") # separate training data into data, train_labels train_labels = pd.DataFrame(train_df["category"]) train_df = train_df["text"] # create model nb = NaiveBayes.NaiveBayes() # train nb.fit(train_df, train_labels) # predict output = nb.predict(test_df) # check accuracy df = pd.DataFrame() df['guess'] = output['guess'] df['actual'] = test_df['category'] df['correct'] = df['guess'] == df['actual'] print df print np.mean(df['correct'])
def output_test_file(input_filename, output_filename): #class, gender, and ticket fare # KNN_classifier = KNN(5, [test_columns.Pclass,test_columns.Sex,test_columns.Fare]) train_data = load_data('train.csv', 'train') bin_data(train_data) # attributes = [ x for x,y in enumerate(att_values) if (y != 'skip' and x != 0)] # DecisionTreeClassifier = DecisionTree(train_data, attributes,'') NBClassifier = \ NaiveBayes([test_columns.PassengerId,test_columns.Sex,test_columns.Fare,test_columns.Pclass,test_columns.Age]) test_data = load_data(input_filename, 'test') output_file_object = csv.writer(open("%s" % output_filename, 'wb')) output_file_object.writerow(["Survived", "PassengerID"]) # for row in test_data: # if row[test_columns.Sex] == 'female': # row[test_columns.Sex] = 0.0 # else: # row[test_columns.Sex] = 1.0 bin_data(test_data) for row in test_data: if NBClassifier.predict(row) == 1: output_file_object.writerow(["1", row[0]]) else: output_file_object.writerow(["0", row[0]])
def __init__(self, filename,classifier='NaiveBayes'): self.classifier = NB.NaiveBayes() self.filename = filename data = pd.read_csv(filename, header=None, \ delimiter="\t", quoting=3) self.corpus = data[1] self.labels = data[0] self.build_vocab(self.corpus)
def tarea1(entrenamiento, prueba): d = Main() (t_0, t_1) = d.split(entrenamiento) nb = NaiveBayes.NaiveBayes(entrenamiento, t_1, t_0, prueba) nb.plot() b = Bayes.Bayes(entrenamiento, t_1, t_0, prueba) b.plot() return
def testNaiveBayes(): X = np.mat(np.loadtxt(r"data\iris\iris.txt", delimiter=",")) numbers = np.mat([0] * 4) nb = NaiveBayes(1) nb.train(X, numbers) result = nb.predict(X) print(X[(X[:, -1] != result).A.flatten(), :].shape[0] / X.shape[0])
def test(self): """Test na sztucznych danych.""" def getfeatures(text): """Funkcja do testów.""" return list(set(text.split())) bayes = NaiveBayes.NaiveBayes(getfeatures) bayes.feature_count = {('terms,', 'C1'): 1, ('considers', 'C2'): 1, ('independently', 'C3'): 1, ('each', 'C1'): 1, ('that', 'C1'): 1, ('the', 'C3'): 1, ('on', 'C1'): 1, ('features', 'C1'): 1, ('and', 'C3'): 1, ('is', 'C2'): 1, ('feature.', 'C2'): 1, ('For', 'C2'): 1, ('fruit', 'C2'): 1, ('features,', 'C2'): 1, ('classifier', 'C2'): 1, ('(or', 'C2'): 2, ('these', 'C1'): 1, ('the', 'C2'): 2, ('particular', 'C2'): 1, ('may', 'C2'): 1, ('Bayes', 'C2'): 1, ('all', 'C2'): 1, ('feature', 'C2'): 1, ('apple', 'C3'): 1, ('naive', 'C2'): 1, ('depend', 'C1'): 1, ('other', 'C2'): 2, ('if', 'C3'): 1, ('contribute', 'C3'): 1, ('any', 'C2'): 1, ('these', 'C2'): 1, ('4"', 'C3'): 1, ('classifier', 'C1'): 1, ('other', 'C1'): 1, ('of', 'C1'): 1, ('assumes', 'C1'): 1, ('Bayes', 'C1'): 1, ('Even', 'C1'): 1, ('presence', 'C1'): 1, ('the', 'C1'): 2, ('a', 'C2'): 3, ('upon', 'C1'): 1, ('that', 'C3'): 1, ('example,', 'C2'): 1, ('properties', 'C3'): 1, ('this', 'C3'): 1, ('to', 'C2'): 1, ('In', 'C1'): 1, ('round,', 'C3'): 1, ('about', 'C3'): 1, ('absence)', 'C2'): 2, ('of', 'C2'): 3, ('diameter.', 'C3'): 1, ('existence', 'C1'): 1, ('be', 'C3'): 1, ('considered', 'C3'): 1, ('a', 'C1'): 1, ('it', 'C3'): 1, ('an', 'C3'): 1, ('or', 'C1'): 1, ('if', 'C1'): 1, ('presence', 'C2'): 1, ('is', 'C3'): 1, ('to', 'C3'): 2, ('unrelated', 'C2'): 1, ('red,', 'C3'): 1, ('probability', 'C3'): 1, ('naive', 'C1'): 1, ('class', 'C2'): 1, ('in', 'C3'): 1, ('simple', 'C1'): 1} bayes.class_count = {'C1': 2, 'C2': 3, 'C3': 2} feat_cats = [ ('of', 'C2'), ('to', 'C3'), ('features', 'C1'), ('Bayes', 'C1'), ('of', 'C1'), ('to', 'C5'), ('features', 'C3'), ('Bayes', 'C2')] probs = [0.0, 0.0, -0.6931, -0.6931, -0.6931, -1e+300, -7.6009, -1.0986] for idx in range(len(feat_cats)): self.assertAlmostEqual( featprob(bayes, feat_cats[idx][0], feat_cats[idx][1]), probs[idx], 4)
def mainWIthAllFlower(): irisData = datasets.load_iris() newDataset = concateTargetWithDataset(irisData.data, irisData.target) naiveBayes = NaiveBayes.NaiveBayes() crossValidator = CrossValidator.CrossValidator(algo=naiveBayes, dataset=newDataset, nbFolds=10) _scoresByFold, meanAccuracy, _rocData = crossValidator.score() print('Accuracy: %.2f%%' % meanAccuracy)
def create_classifier(): dir_pos = os.path.join(BASE_DIR, "pos") dir_neg = os.path.join(BASE_DIR, "neg") nbc = nb.NaiveBayes(positive_corpus=dir_pos, negative_corpus=dir_neg) # treina as duas categorias nbc.train() return nbc
def test_model_nb(dataset): X_con, X_cat, Y, test_con, test_cat, test_y = testImport.read_data( dataset, 0) model = NaiveBayes.NaiveBayes() model.fit(X_con, X_cat, Y) y_hat = model.predict(test_con, test_cat) ac = evaluate_acc_NB(test_y, y_hat) print(ac)
def _init_classifiers(self): # Initialize classifier objects self.fenc = FreemanEncoder() self.knn = KNN.KNN() self.HMM = HMM.HMM() self.NaiveBayes = NaiveBayes.NaiveBayes() self.RandomForest = RandomForest.RandomForests() self.SVM = svm.SVM_SVC() self.LogisticReg = LogisticReg.LogisticReg() self.AdaBoost = adaboost.AdaBoost() self.GBRT = gbrt.GBRT() #Train initially on the default data set, if no model saved already # Initialize KNN, no saved model for KNN self.knn.knn_train(CharRecognitionGUI_support.training_dataset, 1.0) # Initialize HMM self.HMM.training(CharRecognitionGUI_support.training_dataset) # Initialize Naive Bayes try: pickle.load( open( "./Models/naivebayes_model.p", "rb" ) ) except IOError: self.NaiveBayes.training(CharRecognitionGUI_support.training_dataset) # Initialize Random Forest try: pickle.load( open( "./Models/random_forest.p", "rb" ) ) except IOError: self.RandomForest.training(CharRecognitionGUI_support.training_dataset) # Initialize SVM try: pickle.load( open( "./Models/svm.p", "rb" ) ) except IOError: self.SVM.training(CharRecognitionGUI_support.training_dataset) # Initialize Logistic Regression try: pickle.load( open( "./Models/logistic_model.p", "rb" ) ) except IOError: self.LogisticReg.training(CharRecognitionGUI_support.training_dataset) # Initialize AdaBoost try: pickle.load( open( "./Models/AdaBoostClassifier.p", "rb" ) ) except IOError: self.AdaBoost.training(CharRecognitionGUI_support.training_dataset) # Initialize GBRT try: pickle.load( open( "./Models/GradientBoostingClassifier.p", "rb" ) ) except IOError: self.GBRT.training(CharRecognitionGUI_support.training_dataset)
def funcPCA(): nb = NaiveBayes.NaiveBayes() data = nb.convert(0) pca = PCA(n_components=512) print("\nNaive Byes after PCA to reduced data dimension to 512\n") featureMatrix = np.zeros([len(data.train),1024]) for i, image in enumerate(data.train): featureMatrix[i]=image.inp_data featureMatrix = pca.fit(featureMatrix).transform(featureMatrix) for i, image in enumerate(data.train): data.train[i].inp_data = featureMatrix[i] featureMatrix = np.zeros([len(data.test),1024]) for i, image in enumerate(data.test): featureMatrix[i]=image.inp_data featureMatrix = pca.fit(featureMatrix).transform(featureMatrix) for i, image in enumerate(data.test): data.test[i].inp_data = featureMatrix[i] print("\nCalculating the Likelihood and Prior\n") likelihood,prior = nb.train(data) train_accuracy = nb.classify(data, likelihood, prior) train_accuracy = float("{:.2f}".format(train_accuracy)) print("\nThe training error rate is ::", \ train_accuracy,"%\n") test_accuracy = nb.test(data, likelihood, prior) test_accuracy = float("{:.2f}".format(test_accuracy)) print("\nThe testing error rate is ::", \ test_accuracy,"%\n") print("\nKNN after PCA to reduced data dimension to 512\n") print("Please NOTE it will takes 15 minutes for KNN to run\n") knn = KNearestNeighbours.KNearestNeighbours() data = knn.convert(0) print("\nEvaluating the testing error in KNN using different k values\n") testErrors = [] trainErrors = [] for k in range(1,11): testErrors.append(knn.classify(data, k)) print("\nThe testing error rate for k = ",k,"is :",testErrors[-1],"\n") print("\nEvaluating the training error in KNN using different k values\n") for k in range(1,11): trainErrors.append(knn.train(data, k)) print("\nThe training error rate for k = ",k,"is :",trainErrors[-1],"\n")
def mainTestBrainCancer(): irisData = datasets.load_breast_cancer() newDataset = concateTargetWithDataset(irisData.data, irisData.target) naiveBayes = NaiveBayes.NaiveBayes() crossValidator = CrossValidator.CrossValidator(algo=naiveBayes, dataset=newDataset, nbFolds=10) _scoresByFold, meanAccuracy, rocData = crossValidator.score() print('Accuracy: %.2f%%' % meanAccuracy) roc = ROC.ROC() roc.rocCurve(rocData) roc.showROC()
def main(data, method, P, **kwargs): partitions = list(split(data, P)) if P > 1 else [data] metrics = list() for i in range(P): if method == "DT": model = DecisionTree(kwargs['RENDER_TREE']) elif method == "RF": model = RandomForest(kwargs['T'], kwargs['M'], kwargs['bagging']) elif method == "KNN": model = KNN(kwargs['K'], kwargs['scaling']) elif method == "NB": model = NaiveBayes() elif method == "BST": model = Boost(kwargs['T']) test = partitions[i] train = [] for j in range(0, i): train += partitions[j] for j in range(i + 1, P): train += partitions[j] result = model.fit_transform(train, test) actual = map(lambda t: t['class'], result) predicted = map(lambda t: t['assigned'], result) metrics.append(Performance(actual, predicted)) print metrics[-1] print if not P == 1: print '-----------------------------' print "Accuracy:", round( mean(filter(lambda v: v <= 1, map(lambda m: m.accuracy(), metrics))) * 100, 2), "%" print "Precision:", round( mean( filter(lambda v: v <= 1, map(lambda m: m.precision(), metrics))) * 100, 2), "%" print "Recall:", round( mean(filter(lambda v: v <= 1, map(lambda m: m.recall(), metrics))) * 100, 2), "%" print "F1-Measure:", round( mean(filter(lambda v: v <= 1, map(lambda m: m.f1(), metrics))) * 100, 2), "%"
def mainWhitoutLastFlower(): irisData = datasets.load_iris() irisData.data = irisData.data[50:] irisData.target = irisData.target[50:] newDataset = concateTargetWithDataset(irisData.data, irisData.target) naiveBayes = NaiveBayes.NaiveBayes() crossValidator = CrossValidator.CrossValidator(algo=naiveBayes, dataset=newDataset, nbFolds=10) _scoresByFold, meanAccuracy, rocData = crossValidator.score() print('Accuracy: %.2f%%' % meanAccuracy) roc = ROC.ROC() roc.rocCurve(rocData) roc.showROC()
def main(): """ Loads data into partitions, creates a Naive Bayes model based on the train data, runs the model on the test data, and evaluates its accuracy. """ opts = util.parse_args() train_partition, test_partition = util.read_arff(opts.filename) nb_model = NaiveBayes(train_partition) examples = test_partition.data total = len(examples) total_correct = 0 K = test_partition.K confusion_matrix = np.zeros((K, K), int) for example in examples: y_hat = nb_model.classify(example.features) y = example.label confusion_matrix[y][y_hat] += 1 if y_hat == y: total_correct += 1 accuracy = round(total_correct / total, 6) accuracy_str = "Accuracy: " + str(accuracy) + " (" correct_str = str(total_correct) + " out of " + str(total) + " correct)" print(accuracy_str + correct_str) stretch = 8 prediction_labels = " " top_row = " " table = "" for y_hat in range(K): prediction_labels += " " * (stretch - len(str(y_hat + 1))) + str(y_hat + 1) top_row += "-" * stretch for y in range(K): table += " " + str(y + 1) + "|" for y_hat in range(K): entry = str(confusion_matrix[y][y_hat]) table += " " * (stretch - len(entry)) + entry table += "\n" print("\n\n prediction") print(prediction_labels) print(top_row) print(table)
def learnClassifer(self): model = NaiveBayes() dict = {} dict['cases'] = 1 attributes = [] for j in range(len(self.featureFactory.datatable)): dict = {} dict['cases'] = 1 dict['attributes'] = {} line = self.featureFactory.datatable[j] for i in range(len(line)): dict['attributes'][str(i)] = line[i] attributes.append(str(i)) dict['label'] = self.featureFactory.classes[j] model.add_instances(dict) model.set_real(attributes) model.train() self.model = model return pickle.dumps(model).encode('string_escape')
def test_both(): log_res = [] nb_res = [] for i in range(1, 5): x, y, x_test, y_test = testImport.read_data(i, 1) x_con, x_cat, y_, xt_con, xt_cat, yt = testImport.read_data(i, 0) log = LogRegression.Log_Regression(1, 0.005, 25000) nb = NaiveBayes.NaiveBayes() log.fit(x, y) nb.fit(x_con, x_cat, y) log_per = evaluate_acc(y_test, log.predict(x_test)) nb_per = evaluate_acc_NB(yt, nb.predict(xt_con, xt_cat)) log_res.append(log_per) nb_res.append(nb_per) print(log_res) print(nb_res)
def test_nb_smaller_d(): d_list = [200, 100, 50, 40, 30, 25, 20, 10, 7, 5, 4, 3] for i in range(1, 5): smaller_d = [] x_con, x_cat, y, xt_con, xt_cat, yt = testImport.read_data(i, 0) for d in d_list: if x_con is not None: x_con, xt_con = less_features(x_con, xt_con, d) if x_cat is not None: x_cat, xt_cat = less_features(x_cat, xt_cat, d) model = NaiveBayes.NaiveBayes() model.fit(x_con, x_cat, y) smaller_d.append(evaluate_acc_NB(yt, model.predict(xt_con, xt_cat))) plt.plot(d_list, smaller_d) plt.xlabel('N') plt.ylabel('performance') plt.legend(['ionosphere', 'census', 'poker', 'credit']) plt.savefig('nb_testing/smaller_d')
def mainWhitoutMiddleFlower(): irisData = datasets.load_iris() irisData.data = [ instance for index, instance in enumerate(irisData.data) if index < 51 or index > 100 ] irisData.target = list(filter(lambda label: label != 1, irisData.target)) newDataset = concateTargetWithDataset(irisData.data, irisData.target) naiveBayes = NaiveBayes.NaiveBayes() crossValidator = CrossValidator.CrossValidator(algo=naiveBayes, dataset=newDataset, nbFolds=10) _scoresByFold, meanAccuracy, rocData = crossValidator.score() print('Accuracy: %.2f%%' % meanAccuracy) roc = ROC.ROC() roc.rocCurve(rocData) roc.showROC()
def main(): opts = util.parse_args() train_partition = util.read_arff(opts.train_filename) test_partition = util.read_arff(opts.test_filename) #Creating Naive Bayes Model nb_model = NaiveBayes(train_partition) m = len(test_partition.labels) confusion_matrix = np.zeros((m, m)) #initializing the confusion matrix accuracy = 0 for x in test_partition.data: y_hat = nb_model.classify(x.features) y = x.label confusion_matrix[y][y_hat] += 1 if y == y_hat: accuracy += 1 print('Accuracy: ' + str(round(accuracy / test_partition.n, 6)) + ' (' + str(accuracy) + ' out of ' + str(test_partition.n) + ' correct)') print(confusion_matrix)
def main(argv): testname = '' HlayerSize = 100 HlayerCount = 2 nsplits = 3 try: opts, args = getopt.getopt(argv,"hi:o:",["tname=","stname="]) except getopt.GetoptError: print 'test.py -t <testname> -hls <Hidden Layer Size> -hlc <Hidden Layer Count> -n <nsplits>' sys.exit(2) for opt, arg in opts: if opt == '-h': print 'test.py -t <testname> -hls <Hidden Layer Size> -hlc <Hidden Layer Count> -n <nsplits>' sys.exit() elif opt in ("-t", "--tname"): testname = arg elif opt in ("-hls"): HlayerSize = arg elif opt in ("-hlc"): HlayerCount = arg elif opt in ("-n"): nsplits = arg Hlayer = [HlayerSize] * HlayerCount NB = nb.NaiveBayes(testname = testname,subtestname='naivebayes') X,Y= NB.loadMatrixFromFile() res.getResults(NB) RF = rf.RandomForest(testname=testname,subtestname='randomforest') X,Y = RF.loadMatrixFromFile() res.getResults(RF) NN = nn.NeuralNetwork(testname=testname,subtestname='neuralnetwork',HlayerSizes=Hlayer, nsplits=nsplits) X,Y = NN.loadMatrixFromFile() res.getResults(NN)
def main(): # Process the data opts = util.parse_args() train_partition = util.read_arff(opts.train_filename) test_partition = util.read_arff(opts.test_filename) # sanity check print("num train =", train_partition.n, ", num classes =", train_partition.K) print("num test =", test_partition.n, ", num classes =", test_partition.K) nb_model = NaiveBayes(train_partition) y_real = [] #list of real y's y_h = [] #list of predicted y's for example in test_partition.data: #loops through test example list y_hat = nb_model.classify(example.features) #calls classify on each example's feature y_real.append(int(example.label)) #appends the test data's label to y_real y_h.append(y_hat) #appends the predicted label to y_h\ ln = len(nb_model.classes) l = len(test_partition.data) confusion_matrix = np.zeros((ln,ln)) #makes a confusion matrix of zeroes of the right size first for i in range(l): y_r = y_real[i] pred_y = y_h[i] confusion_matrix[y_r][pred_y] += 1 #adds one to diagonal elements of the numpy array n = 0 #keeps track of number of accurate data points for i in range(ln): n += confusion_matrix[i][i] #sums the diagonal accuracy = n / (l) #computes accuracy #printing here print("Accuracy", round(accuracy, 7), "(", int(n), " out of ", l , " correct)") print("Confusion Matrix:") print(confusion_matrix)
def test_nb_smaller_n(): n_list = [ 1000, 500, 400, 300, 250, 200, 150, 100, 75, 50, 40, 30, 20, 15, 10 ] for i in range(1, 5): smaller_n = [] x_con, x_cat, y, xt_con, xt_cat, yt = testImport.read_data(i, 0) for n in n_list: if x_con is not None and x_cat is not None: x_con, x_cat, y = less_cases_separate(x_con, x_cat, y, n) if x_con is not None: x_con, y = less_cases_together(x_con, y, n) else: x_cat, y = less_cases_together(x_cat, y, n) model = NaiveBayes.NaiveBayes() model.fit(x_con, x_cat, y) smaller_n.append(evaluate_acc_NB(yt, model.predict(xt_con, xt_cat))) plt.plot(n_list, smaller_n) plt.xlabel('N') plt.ylabel('performance') plt.legend(['ionosphere', 'census', 'poker', 'credit']) plt.savefig('nb_testing/smaller_n')
def cross_eval(directory, parts, verbose=False): """Dokonuje sprawdzenia krzyżowego.""" correct = 0 total = 0 for i in range(1, parts + 1): testlist = [] trainlist = [] for j in range(1, parts + 1): if i == j: testlist.extend(glob.glob("%s/part%d/*" % (directory, j))) else: trainlist.extend(glob.glob("%s/part%d/*" % (directory, j))) classifier = NaiveBayes.NaiveBayes(getwords) if verbose: print i, "\tTraining classifier" for doc in trainlist: train(classifier, doc, category(doc)) if verbose: print "\tClassifying" for doc in testlist: bestcat = classify(classifier, doc) if verbose: print "\t", doc, ":", bestcat, "-", if bestcat == category(doc): if verbose: print "correct" correct += 1 else: if verbose: print "wrong" total += 1 return float(correct) / float(total)
def test(): nb_samples = 2000 nb_rounds = 10 x = np.zeros((nb_rounds)) y = np.zeros((nb_rounds)) x_time = 0.0 y_time = 0.0 for i in range(nb_rounds): bnbdata_X, bnbdata_Y = make_classification(n_samples=nb_samples, n_features=20, n_informative=20, n_classes=5, n_redundant=0) binarize(bnbdata_X) bnb = MultinomialNB() start_time = time.time() y_pred_official = bnb.fit(bnbdata_X, bnbdata_Y).predict(bnbdata_X) finish_time = time.time() y_time += (finish_time - start_time) mnb = nb.NaiveBayes(num_class=20) start_time = time.time() mnb.fit(bnbdata_X, bnbdata_Y) y_pred_scratch = mnb.predict(bnbdata_X) finish_time = time.time() x_time += (finish_time - start_time) print("mnb: ", (bnbdata_Y != y_pred_scratch).sum(), "bnb: ", (bnbdata_Y != y_pred_official).sum()) y[i] = (bnbdata_Y != y_pred_official).sum() x[i] = (bnbdata_Y != y_pred_scratch).sum() print("mnb_ave_time: ", x_time / nb_rounds, "bnb_avg_time: ", y_time / nb_rounds) return np.var(x), np.var(y), np.average(x), np.average(y)
def choix_classifieurs(self, X_train, y_train, X_test, y_test): print( " \n\t\t--- Recherche des meilleurs classifieurs pour chaque méthode ---\n\n" ) #Choix des classifieurs print(" --- Recherche pour Naive Bayes ---\n") #Naive Bayes nB = nb.NaiveBayes() clfNB = nB.choixNB(X_train, y_train, X_test, y_test) #Arbre de décision print(" --- Recherche pour Arbre de Decision ---\n") tree = dt.DecisionTree() clfTree, _ = tree.recherche_param(X_train, y_train, X_test, y_test) #K plus proches voisins print( "\n --- Pas de recherche de paramètres pour les K plus proches voisins ---\n" ) kNN = knn.KNN() #SVM print(" --- Recherche pour la SVM ---\n") sVM = svm.SVM() clfSVM = sVM.hyperParameter(X_train, y_train) #Perceptron print(" --- Recherche pour le Perceptron ---\n") perceptron = perceptr.Perceptr() clfPerceptr = perceptron.rechercheHypParm(X_train, y_train, X_test, y_test) return (clfNB, clfTree, kNN, clfPerceptr, clfSVM)
# 初始化方差,生成样本与标签 sigma = np.zeros((k, 3, 3)) for i in range(k): sigma[i, :, :] = np.diag(np.random.randint(10, 25, size=(3, ))) sample, target = generate_random(sigma, N) feature_names = ['x_label', 'y_label', 'z_label'] # 特征数 target_names = ['gaussian1', 'gaussian2', 'gaussian3', 'gaussian4'] # 类别 data = Bunch(sample=sample, feature_names=feature_names, target=target, target_names=target_names) sample_t, target_t = generate_random(sigma, N) data_t = Bunch(sample=sample_t, target=target_t) # 训练模型,计算精确度 model = NaiveBayes() model.fit(data.sample, data.target.flatten()) tar_train = np.array([model.predict(x) for x in data.sample], dtype=np.uint8) tar_test = np.array([model.predict(x) for x in data_t.sample], dtype=np.uint8) acc_train = model.score(data.sample, data.target.flatten()) acc_test = model.score(data_t.sample, data_t.target.flatten()) print_list = [acc_train * 100, acc_test * 100] print( 'Accuracy on training set: {0[0]:.2f}%, accuracy on testing set: {0[1]:.2f}%.' .format(print_list)) # 测试一个数据 summary = model.normalized_prob(data_t.sample[100]) print(summary)
max_iter=10, alpha=1e-4, solver='sgd', verbose=10, random_state=1, learning_rate_init=.1) nn.fit(x_train, y_train) y_pred_nn = nn.predict(x_test) end3 = time.time() nn_time = end3 - start3 #NB Model start4 = time.time() nb = NaiveBayes.NaiveBayes() nb.fit(x_train, y_train) y_pred_nb = nb.predict(x_test) end4 = time.time() nb_time = end4 - start4 print("SVM Time: {:0.2f} minute".format(svm_time / 60.0)) print("KNN Time: {:0.2f} minute".format(knn_time / 60.0)) print("NN Time: {:0.2f} minute".format(nn_time / 60.0)) print("NB Time: {:0.2f} minute".format(nb_time / 60.0)) # SVM report and analysis