def performMultiClassLogistic(inputDataClass, maxDegree, learnRate, GDthreshold, isRegularized, lambd, drawConfusion=False, drawSquaredLoss=False): train_data = inputDataClass.Train test_data = inputDataClass.Test Ytrue = test_data[:, -1] phiX = linearLogisticModels.calcPhiX(train_data[:, :-1], maxDegree) clf = LogisticRegression(solver='lbfgs', multi_class='multinomial').fit( phiX, train_data[:, -1]) phiXTest = linearLogisticModels.calcPhiX(test_data[:, :-1], maxDegree) Ypred = clf.predict(phiXTest) acc = performanceAnalyser.calcAccuracyTotal(Ypred, Ytrue) print("Logistic model Accuracy (Scikit-learn) " + str(acc)) #Our implementation multi_class_logistic_model = linearLogisticModels.MultiClassLogistic( maxDegree, learnRate, GDthreshold, isRegularized, lambd) multi_class_logistic_model.train(train_data) Ypred = multi_class_logistic_model.test(inputDataClass.Test[:, :-1]) acc = performanceAnalyser.calcAccuracyTotal(Ypred, Ytrue) print("Multi Class Logistic model Accuracy " + str(acc)) if drawConfusion: confusion = performanceAnalyser.getConfusionMatrix(Ytrue, Ypred) Visualization.visualizeConfusion(confusion) if drawSquaredLoss: Visualization.visualizeLossvsIteration( multi_class_logistic_model.squaredLoss) return Ytrue, Ypred
def performLogisticModels(inputDataClass, maxDegree, learnRate, GDthreshold, isRegularized, lambd, drawConfusion=False, drawLikelihood=False): train_data = inputDataClass.Train test_data = inputDataClass.Test Ytrue = test_data[:, -1] # Scikit Learn phiX = linearLogisticModels.calcPhiX(train_data[:, :-1], maxDegree) clf = LogisticRegression().fit(phiX, train_data[:, -1]) phiXTest = linearLogisticModels.calcPhiX(test_data[:, :-1], maxDegree) Ypred = clf.predict(phiXTest) acc = performanceAnalyser.calcAccuracyTotal(Ypred, Ytrue) print("Logistic model Accuracy (Scikit-learn) " + str(acc)) # Our implementation logistic_model = linearLogisticModels.LogisticModels( maxDegree, learnRate, GDthreshold, isRegularized, lambd) logistic_model.train(train_data) Ypred = logistic_model.test(test_data[:, :-1]) acc = performanceAnalyser.calcAccuracyTotal(Ypred, Ytrue) print("Logistic model Accuracy " + str(acc)) if drawConfusion: confusion = performanceAnalyser.getConfusionMatrix(Ytrue, Ypred) Visualization.visualizeConfusion(confusion) if drawLikelihood: Visualization.visualizeLikelihoodvsIteration(logistic_model.likelihood) return Ytrue, Ypred
def performLinearModels(inputDataClass, maxDegree, isRegularized, lambd, isRegress=False, drawConfusion=False, drawScatter=False): train_data = inputDataClass.Train test_data = inputDataClass.Test Ytrue = test_data[:, -1] # Scikit-learn Regression reg = LinearRegression(fit_intercept=True, normalize=False, copy_X=True).fit(train_data[:, :-1], train_data[:, -1]) Ypred = reg.predict(test_data[:, :-1]) if isRegress: rms = performanceAnalyser.calcRootMeanSquareRegression(Ypred, Ytrue) print("Linear model rms (Scikit-learn) " + str(rms)) if not isRegress: Ypred = (Ypred > 0.5).astype(np.int) acc = performanceAnalyser.calcAccuracyTotal(Ypred, Ytrue) print("Linear model Accuracy (Scikit-learn)" + str(acc)) # Our implementation linear_model = linearLogisticModels.LinearModels(maxDegree, isRegularized, lambd) linear_model.train(train_data) Ypred = linear_model.test(test_data[:, :-1], isRegress) if isRegress: rms = performanceAnalyser.calcRootMeanSquareRegression(Ypred, Ytrue) print("Linear model rms " + str(rms)) r2Score = performanceAnalyser.R2(Ypred, Ytrue) print("Linear model R2 Score " + str(r2Score)) if drawScatter: Visualization.visualizeDataRegression(train_data[:, :-1], train_data[:, -1], linear_model.W) if not isRegress: acc = performanceAnalyser.calcAccuracyTotal(Ypred, Ytrue) print("Linear model Accuracy " + str(acc)) if drawConfusion: confusion = performanceAnalyser.getConfusionMatrix(Ytrue, Ypred) Visualization.visualizeConfusion(confusion) return Ytrue, Ypred
def main(mode, kernel_type): X, Y, x, y = get_data(mode) x_test = x y_test = y Y_true = y_test degree = 3 cost = 1 tol = 0.001 n_crossval = 5 # classification if mode!=3: svm_type = 0 Y_pred, ACC, MSE, SCC = train(X, Y, x_test, y_test, svm_type=svm_type ,kernel_type=kernel_type, degree=degree, cost=cost, tolerance=tol, n_crossval=n_crossval) acc = performanceAnalyser.calcAccuracyTotal(Y_pred,Y_true) precision, recall, f1score = performanceAnalyser.goodness(Y_true, Y_pred) confMat = performanceAnalyser.getConfusionMatrix(Y_true,Y_pred) print(f'\n\nAccuracy: {acc}\nPrecision: {precision}\n Recall: {recall}\nF1score: {f1score}\nConfusion Matrix: {confMat}\n') else: #regression svm_type = 4 Y_pred, ACC, MSE, SCC = train(X, Y, x_test, y_test, svm_type=svm_type ,kernel_type=kernel_type, degree=degree, cost=cost, tolerance=tol, n_crossval=n_crossval) rmse = performanceAnalyser.calcRootMeanSquareRegression(np.asarray(Y_pred),np.asarray(Y_true)) exp_var = explained_variance_score(Y_true, Y_pred) mse, r2 = performanceAnalyser.R2(np.asarray(Y_pred),np.asarray(Y_true)) print(f'\n\nMSE: {mse}\nRMSE: {rmse}\nR2: {r2}\nExplained Variance: {exp_var}\n')
def main(): x = ["Medical_data.csv", "test_medical.csv"] inp = inputReader.InputReader(x, 0) training_data = inp.Train test_data = inp.Test[:, :-1] ytrue = inp.Test[:, -1] input_data = training_data[:, :-1] labels = training_data[:, -1] per = multi_perceptron(input_data, labels, test_data) per.process(19) ypred = per.ypred() print(performanceAnalyser.calcAccuracyTotal(ypred, ytrue))
def performPerceptron(inputDataClass, numIter, isBinary): train_data = inputDataClass.Train test_data = inputDataClass.Test Ytrue = test_data[:, -1] clf = Perceptron().fit(train_data[:, :-1], train_data[:, -1]) Ypred = clf.predict(test_data[:, :-1]) acc = performanceAnalyser.calcAccuracyTotal(Ypred, Ytrue) print("Perceptron model Accuracy (Scikit-learn) " + str(acc)) # Our perceptron if isBinary: percept = perceptron.percep_2(train_data[:, :-1], train_data[:, -1], test_data[:, :-1]) else: percept = perceptron.multi_perceptron(train_data[:, :-1], train_data[:, -1], test_data[:, :-1]) percept.process(numIter) Ypred = percept.ypred() acc = performanceAnalyser.calcAccuracyTotal(Ypred, Ytrue) print("Perceptron model Accuracy " + str(acc)) return Ytrue, Ypred
def performMultiClassLinear(inputDataClass, maxDegree, learnRate, isRegularized, lambd, drawConfusion=False): multi_class_linear_model = linearLogisticModels.MultiClassLinear( maxDegree, learnRate, isRegularized, lambd) multi_class_linear_model.train(inputDataClass.Train) Ytrue = inputDataClass.Test[:, -1] Ypred = multi_class_linear_model.test(inputDataClass.Test[:, :-1]) acc = performanceAnalyser.calcAccuracyTotal(Ypred, Ytrue) print("Multi Class Linear model Accuracy " + str(acc)) if drawConfusion: confusion = performanceAnalyser.getConfusionMatrix(Ytrue, Ypred) Visualization.visualizeConfusion(confusion) return Ytrue, Ypred
def performKNN(inputDataClass, nearestNeighbours, mode, label_with_distance=False): covar = -1 if mode == 3: covar = performanceAnalyser.getFullCovariance( inputDataClass.Train[:, :-1]) knn = KNN.KNN(nearestNeighbours, inputDataClass.Train[:, :-1], inputDataClass.Test[:, :-1], inputDataClass.Train[:, -1], label_with_distance=label_with_distance, mode=mode, covar=covar) knn.allocate() Ypred = knn.labels Ytrue = inputDataClass.Test[:, -1] print("Testing Accuracy = " + str(performanceAnalyser.calcAccuracyTotal(Ypred, Ytrue))) return Ytrue, Ypred
def performKMeans(inputDataClass, k, mode, num_runs, visualize=False): covar = -1 if mode == 3: covar = performanceAnalyser.getFullCovariance( inputDataClass.Train[:, :-1]) labels, means, rms, Ypred = kmeans.kfit(inputDataClass.Train[:, :-1], k, inputDataClass.Train[:, -1], inputDataClass.Test[:, :-1], num_runs=num_runs, mode=mode, covar=covar) print("rms = " + str(rms)) print("Kmeans done") Ytrue = inputDataClass.Test[:, -1] print("Testing Accuracy = " + str(performanceAnalyser.calcAccuracyTotal(Ypred, Ytrue))) if visualize: Visualization.visualizeKMeans(inputDataClass.Train[:, :-1], labels, k) print("Kmeans visualized") return Ytrue, Ypred
def performBayes(inputDataClass, drawPrecisionRecall=False, drawConfusion=False): """################################# Bayes Classifier #############################################""" ##Sklearn # print("\nSklearn Naive Bayes") # clf = GaussianNB() # clf.fit(inputDataClass.Train[:,:-1], inputDataClass.Train[:,-1]) # Ypred = clf.predict(inputDataClass.Train[:,:-1]) # Ytrue = inputDataClass.Train[:,-1] # print("Training Accuracy = "+str(performanceAnalyser.calcAccuracyTotal(Ypred,Ytrue))) # Ypred = clf.predict(inputDataClass.Test[:,:-1]) # Ytrue = inputDataClass.Test[:,-1] # print("Testing Accuracy = "+str(performanceAnalyser.calcAccuracyTotal(Ypred,Ytrue))) print("\nMy Naive Bayes") # bayesClassifier = Bayes.Bayes(isNaive = False, distribution =[0 for i in range(inputDataClass.Train.shape[1]-1)]) bayesClassifier = Bayes.Bayes(isNaive=True, distribution=[0, 0, 1, 1, 0]) bayesClassifier.train(inputDataClass.Train) print("Training of model done.") Ypred = bayesClassifier.fit(inputDataClass.Train) Ytrue = inputDataClass.Train[:, -1] print("Training Accuracy = " + str(performanceAnalyser.calcAccuracyTotal(Ypred, Ytrue))) Ypred = bayesClassifier.fit(inputDataClass.Test) Ytrue = inputDataClass.Test[:, -1] print("Testing Accuracy = " + str(performanceAnalyser.calcAccuracyTotal(Ypred, Ytrue))) print("Prediction done.") if drawConfusion: confusion = performanceAnalyser.getConfusionMatrix(Ytrue, Ypred) Visualization.visualizeConfusion(confusion) if drawPrecisionRecall: ############################ precision-recall curve ############################# threshold = np.arange(0.9, 0.1, -0.1) probas = bayesClassifier.get_probas() for dic in probas: sums = 0.0 for item in dic: sums += dic[item] for item in dic: dic[item] = dic[item] / sums roc = ROC.Roc(Ytrue, probas, threshold, '') roc.Roc_gen() precision, recall, _ = precision_recall_curve(Ytrue, probas) plt.step(recall, precision, color='b', alpha=0.2, where='post') plt.fill_between(recall, precision, step='post', alpha=0.2, color='b') plt.xlabel('Recall') plt.ylabel('Precision') plt.ylim([0.0, 1.05]) plt.xlim([0.0, 1.0]) plt.title('Precision Recall Curve') return Ytrue, Ypred
Y_pred = [] for i in range(x_test.shape[0]): z = x_test[i][:-1] # argmin dist = float("inf") for c in classes: a, b = np.matmul(W, z.transpose()), np.matmul(W, means[c].transpose()) curr_dist = distance(dist_metric, a, b) if curr_dist<dist: dist = curr_dist curr_class = c Y_pred.append(curr_class) # analyse performance Y_true = x_test[:,-1] acc = performanceAnalyser.calcAccuracyTotal(Y_pred,Y_true) precision, recall, f1score = performanceAnalyser.goodness(Y_true, Y_pred) confMat = performanceAnalyser.getConfusionMatrix(Y_true,Y_pred) print(f'Accuracy:{acc}\n Precision:{precision}\n Recall:{recall}\n F1score:{f1score}\n Confusion Matrix:{confMat}\n') return Y_pred, acc, precision, recall, f1score, confMat if __name__ == '__main__': data = {0:'Medical', 1:'F-MNIST', 2:'Railway', 3:'River'} mode = int(sys.argv[1]) max_dim = int(sys.argv[2]) print (f'--> {data[mode]} dataset') print (f'--> \n')