def main(args): laplace_number_set = np.linspace(0, 0.2, 2001) print(laplace_number_set) laplace_performance = np.zeros(2000) for number in range(2000): if (number == 0): laplace_performance[number] = 0 else: train_set, train_labels, dev_set, dev_labels = reader.load_dataset( args.training_dir, args.development_dir, args.stemming) predicted_labels = nb.naiveBayes(train_set, train_labels, dev_set, laplace_number_set[number]) accuracy, f1, precision, recall = compute_accuracies( predicted_labels, dev_set, dev_labels) laplace_performance[number] = accuracy best_laplace = np.argmax(laplace_performance) train_set, train_labels, dev_set, dev_labels = reader.load_dataset( args.training_dir, args.development_dir, args.stemming) predicted_labels = nb.naiveBayes(train_set, train_labels, dev_set, best_laplace) accuracy, f1, precision, recall = compute_accuracies( predicted_labels, dev_set, dev_labels) laplace_performance[number] = accuracy print("best laplace parameter:", best_laplace) print("Accuracy:", accuracy) print("F1-Score:", f1) print("Precision:", precision) print("Recall:", recall)
def test_unigram_dev_stem_false_lower_false(): print("Running unigram test..."+'\n') train_set, train_labels, dev_set, dev_labels = reader.load_dataset( "data/spam_data/train", "data/spam_data/dev", stemming=False, lower_case=False, use_tqdm=False ) predicted_labels = nb.naiveBayes( train_set, train_labels, dev_set, smoothing_parameter=1.0, pos_prior=0.5) if len(predicted_labels) != len(dev_labels): print("The length of the list of predictions is not equivalent to the length of the list of development labels.") errorDict = { 'name': 'Unigram test on dev set without stemming and without lowercase', 'score': 0, 'max_score': 20, 'visibility': 'visible' } return json.dumps(errorDict, indent=1) ( accuracy, f1, precision, recall, ) = mp2.compute_accuracies(predicted_labels, dev_set, dev_labels) print("Accuracy:",accuracy) print("F1-Score:",f1) print("Precision:",precision) print("Recall:",recall) total_score = 0 if accuracy >= 0.81: total_score += 5 print("+ 5 points for accuracy above " + str(0.81)) else: print("Accuracy needs to be above " + str(0.81)) if accuracy >= 0.86: total_score += 5 print("+ 5 points for accuracy above " + str(0.86)) else: print("Accuracy needs to be above " + str(0.86)) if accuracy >= 0.91: total_score += 5 print("+ 5 points for accuracy above " + str(0.91)) else: print("Accuracy needs to be above " + str(0.91)) if accuracy >= 0.95: total_score += 5 print("+ 5 points for accuracy above " + str(0.95)) else: print("Accuracy needs to be above " + str(0.95)) resultDict = { 'name': 'Unigram test on dev set without stemming and without lowercase', 'score': total_score, 'max_score': 20, 'visibility': 'visible' } return json.dumps(resultDict, indent=1)
def main(args): train_set, train_labels, dev_set,dev_labels = reader.load_dataset(args.training_dir,args.development_dir,args.stemming) predicted_labels = nb.naiveBayes(train_set,train_labels, dev_set, args.laplace) accuracy,f1,precision,recall = compute_accuracies(predicted_labels,dev_set,dev_labels) print("Accuracy:",accuracy) print("F1-Score:",f1) print("Precision:",precision) print("Recall:",recall)
def main(args): #Modify stemming and lower case below. Note that our test cases may use both settings of the two parameters train_set, train_labels, dev_set, dev_labels = reader.load_dataset(args.training_dir,args.development_dir,stemming=False,lower_case=False) predicted_labels = nb.naiveBayes(train_set, train_labels, dev_set) accuracy, false_positive, false_negative, true_positive, true_negative = compute_accuracies(predicted_labels,dev_labels) print("Accuracy:",accuracy) print("False Positive", false_positive) print("Fale Negative", false_negative) print("True Positive", true_positive) print("True Negative", true_negative)
def all_algo_model(self): print(" \n----------------------------------\n") print("\n Decision Tree \n") print(" \n----------------------------------\n") #decisionTree().decision_tree_algo() print(" \n----------------------------------\n") print("\n Gradient Descent \n") print(" \n----------------------------------\n") gradientDescent().gradient_descent_algo() print(" \n----------------------------------\n") print("\n K-Nearest Neighbour \n") print(" \n----------------------------------\n") #KNNAlgo().KNN__model_algo() print(" \n----------------------------------\n") print("\n Linear Regression \n") print(" \n----------------------------------\n") linearRegression().linear_regression_algo() print(" \n----------------------------------\n") print("\n Naive Bayes \n") print(" \n----------------------------------\n") naiveBayes().naive_bayes_algo() print(" \n----------------------------------\n") print("\n Support Vector Machine \n") print(" \n----------------------------------\n") svmModule().svm_model_algo() print("\n---------- End ---------------------\n")
def main(): # List of patient objects patient_list = parse_csv() # create the ten folds ten_folds_strat_list = stratify_data(patient_list) # create the classifer objects knn = kNN() naive_bayes = naiveBayes() # call the 10-fold cross validation ten_fold_strat_cross_validation(knn, ten_folds_strat_list, 10) ten_fold_strat_cross_validation(naive_bayes, ten_folds_strat_list)
def main(args): train_set, train_labels, dev_set, dev_labels = reader.load_dataset( args.training_dir, args.development_dir, args.stemming) predicted_labels = nb.naiveBayes(train_set, train_labels, dev_set, args.laplace) answer = open('answer.txt', 'w') test = open('./data/test.jsonl', 'r').readlines() for i in range(len(test)): answer.write(json.loads(test[i])['id']) answer.write(",") if predicted_labels[i] == 0: answer.write("SARCASM") else: answer.write("NOT_SARCASM") answer.write("\n")
len_bootstrap = 100 # create bootstrap with random data from data train list_bootstrap = {} for i in range(num_bootstrap): bootstrap = [] for j in range(len_bootstrap): randomData = dataTrain[np.random.randint(0, len(dataTrain))] bootstrap.append(randomData) list_bootstrap[i] = np.copy(bootstrap) # predict class result from data test with each bootstrap model_result = {} for i in range(num_bootstrap): dataTrain = list_bootstrap[i] model_result[i] = nb.naiveBayes(dataTrain, dataTest, class_index=2) # voting all model for final result final_result = [] for i in range(len(dataTest)): temp = [model_result[model_idx][i] for model_idx in model_result] temp = collections.Counter(temp) result = max(temp.items(), key=operator.itemgetter(1))[0] final_result.append(result) print(final_result) # save result in csv file data_file = open('Result.csv', 'w', newline='') with data_file: writer = csv.writer(data_file) for row in final_result:
import scipy.io as io import preprocessor as pre from naive_bayes import naiveBayes from logit import logisitcRegression Numpyfile = io.loadmat('mnist_data.mat') trX = Numpyfile['trX'] trY = Numpyfile['trY'] tsX = Numpyfile['tsX'] tsY = Numpyfile['tsY'] #extracting features (average value of pixels, standard deviation of pixels) trxFeatures = pre.extractFeatures(trX) tsxFeatures = pre.extractFeatures(tsX) mean_7,mean_8,sd_7,sd_8,accuracy_bayesian_7,accuracy_bayesian_8,accuracy_bayesian_total=naiveBayes(trxFeatures,trY[0],tsxFeatures,tsY[0]) w,accuracy_logit_7,accuracy_logit_8,accuracy_logit_total = logisitcRegression(trxFeatures,trY[0],tsxFeatures,tsY[0]) print("Accuracy for Naive Bayes:") print("For Digit 7: ",accuracy_bayesian_7*100) print("For Digit 8: ",accuracy_bayesian_8*100) print("Overall: ",accuracy_bayesian_total*100) print("\n") print("Accuracy for Logistic Regression: ") print("For Digit 7: ",accuracy_logit_7*100) print("For Digit 8: ",accuracy_logit_8*100) print("Overall: ",accuracy_logit_total*100)