def s(x): log1,log2 = logistic_regression.predict(x) svm1,svm2 = SVM.predict(x) nb1,nb2 = NaiveBayes.predict(x) X = np.concatenate((log1.reshape(len(log1),1) , log2.reshape(len(log2),1), svm1.reshape(len(svm1),1), svm2.reshape(len(svm2),1),nb1.reshape(len(nb1),1),nb2.reshape(len(nb2),1)),axis = 1) prediction = model.predict(X) return prediction
def run_logistic_regression(Train_X, Train_Y, Test_X, Test_Y): print('Logistic_Regression:') print('\nTraining begins...') m, _ = Train_X.shape Train_X = np.concatenate([np.ones(shape=(m, 1)), Train_X], axis=1) start_time = time.time() theta, accuracy, J_history, it = lr.train(Train_X, Train_Y) end_time = time.time() print('iterations: {it}'.format(it=it)) print('time_taken: {time} sec'.format(time=(end_time - start_time))) print('accuracy: {acc}'.format(acc=accuracy)) plot_J_history(it, J_history, 'Logistic Regression') print('\nValidation on test data:') m, _ = Test_X.shape Test_X = np.concatenate([np.ones(shape=(m, 1)), Test_X], axis=1) res = lr.predict(theta, Test_X) accuracy = lr.get_accuracy(res, Test_Y) print('validation accuracy: {acc}'.format(acc=accuracy)) print('\nTop 30 Spam Predictors:') features = cd.get_features() feature_w = zip(features, theta[1:, 0]) sorted_feature_w = sorted(feature_w, key=lambda t: (t[1], t[0]), reverse=True) for i in range(30): print('%s\t\t%.2f' % (sorted_feature_w[i][0], sorted_feature_w[i][1])) print('\n\n')
def test_logreg_engines_reg(): data = pd.read_csv('data/ex2data2.txt', header=None) x1 = data[0] x2 = data[1] x1 = x1.reshape(x1.size, 1) x2 = x2.reshape(x2.size, 1) y = data[2] y = y.reshape(y.size, 1) x = util.mapfeat(x1,x2) theta = np.zeros(x.shape[1]) l = 1.0 print 'initial cost %f' % logreg.cost(theta, x, y, l) print logreg.gradient(theta,x,y,l).shape param, neval, status = opt.fmin_tnc(func=logreg.cost, x0=theta, fprime=logreg.gradient, args=(x, y, l)) print param print 'Neval %d status %d\n' % (neval, status) print 'Cost: %f' % logreg.cost(param, x, y, l) p = logreg.predict(param,x) #compare predictions from trained params with original truth print 'accuracy %f' % (np.sum(np.logical_not(np.logical_xor(p,data[2]))) / float(y.size)) util.plot_dec_boundary(param,x,y,True)
def main_mz(): dataMat, labelMat = load_data("record.csv") weights = lr_train(dataMat, labelMat) save_model('model.txt', weights) test_data, test_label = load_data("record_test.csv") ans = predict(test_data, weights.T) n = shape(ans)[0] for i in range(n): if ans[i][0] != test_label[i, 0]: print i, test_data[i, 1:], ans[i, 0], test_label[i, 0]
async def predict_results(req): # we need to unpackage the json file so we can get initial values being passet from frontend. # we do this with req.json values = req.json #values is a dictionary prediction = predict(values['age'], values['income']) # We can also use other algorthms here. # We can also compare results here. print('prediction says:', prediction) return response.json(prediction)
def predict_house(data, weights): try: houses = ['Gryffindor', 'Ravenclaw', 'Slytherin', 'Hufflepuff'] _, X = tools.generic_preprocess(data, 'mean') weights = weights.drop(weights.columns[0], axis=1) students = data.loc[:, 'Hogwarts House'].to_frame() i = 0 for house in houses: theta = np.array(weights.iloc[i:i + 1]).reshape(X.shape[1], 1) p = logreg.predict(X, theta) students[house] = p i += 1 students = students.drop(columns=['Hogwarts House']) predictions = students.idxmax(axis=1) except Exception: tools.error_exit('Failed to predict houses.') return predictions
def test_logreg_accept_scores(): data = pd.read_csv('data/ex2data1.txt', header=None) x1 = data[0] x2 = data[1] x1 = x1.reshape(x1.size, 1) x2 = x2.reshape(x2.size, 1) x3 = x1 ** 2 #x4 = x1 ** 4 x0 = np.ones((data[0].size, 1)) x = np.hstack([x0, x1, x2, x3]) #x = np.hstack([x0, x1, x2, x3, x4]) y = data[2] y = y.reshape(y.size, 1) l = 0.0025 #squared term #theta must be an array to work with fmin_tnc x0 param theta = np.zeros(x[0].size) print 'initial cost %f' % logreg.cost(theta, x, y, l) #must take an array #param, neval, status = opt.fmin_tnc(func=cost, x0=theta, fprime=gradient, args=(x, y), approx_grad=True, epsilon=0.000000000001) param, neval, status = opt.fmin_tnc(func=logreg.cost, x0=theta, fprime=logreg.gradient, args=(x, y, l), maxfun=1000) print param print 'Neval %d status %d\n' % (neval, status) print 'Cost: %f' % logreg.cost(param, x, y, 0) #print 'COE: %f' % sigmoid(np.dot(np.array([1,45,85, (45**2), (45**4)]), param.T)) #print 'COE: %f' % sigmoid(np.dot(np.array([1,75.3,46.3, (75.3**2), (75.3**4)]), param.T)) #print 'COE: %f' % sigmoid(np.dot(np.array([1,82.2,41.9, (82.2**2), (82.2**4)]), param.T)) print 'COE: %f' % util.sigmoid(np.dot(np.array([1,45,85, 45**2]), param.T)) print 'COE: %f' % util.sigmoid(np.dot(np.array([1,75.3,46.3, 75.3**2]), param.T)) print 'COE: %f' % util.sigmoid(np.dot(np.array([1,82.2,41.9, 82.2**2]), param.T)) p = logreg.predict(param,x) #compare predictions from trained params with original truth print 'accuracy %f' % (np.sum(np.logical_not(np.logical_xor(p,data[2]))) / float(y.size)) util.plot_dec_boundary(param, x, y, False) '''
def get_stats(examples, alt_examples, w): TP = 0 FP = 0 TN = 0 FN = 0 confidences = np.zeros((np.size(examples, 0), 2)) for i in xrange(np.size(examples, 0)): # get the class of the example ex_out = examples[i][-1] # get predicted output confidence = predict(alt_examples[i], w) out_pred = np.round(confidence) confidences[i][0] = ex_out confidences[i][1] = confidence if out_pred == 1: if ex_out == 1: TP += 1 else: FP += 1 else: if ex_out == 0: TN += 1 else: FN += 1 TOT = float(len(examples)) accuracy = float(TP + TN) / (TOT) if TP == 0 and FP == 0: print 'no positive classifications, cannot compute precision' precision = 0 else: precision = float(TP) / float(TP + FP) if TP == 0 and FN == 0: print 'could not compute recall' recall = 0 else: recall = float(TP) / float(TP + FN) return accuracy, precision, recall, confidences
lambda_ = 1 # set options for optimize.minimize options = {'maxiter': 100} res = optimize.minimize(costFunctionReg, initial_theta, (X, y, lambda_), jac=True, method='TNC', options=options) # the fun property of OptimizeResult object returns # the value of costFunction at optimized theta cost = res.fun # the optimized theta is in the x property of the result theta = res.x utils.plotDecisionBoundary(plot_data, theta, X, y) pyplot.xlabel('Microchip Test 1') pyplot.ylabel('Microchip Test 2') pyplot.legend(['y = 1', 'y = 0']) pyplot.grid(False) pyplot.title('lambda = %0.2f' % lambda_) pyplot.show() # Compute accuracy on our training set p = predict(theta, X) print('Train Accuracy: %.1f %%' % (np.mean(p == y) * 100)) print('Expected accuracy (with lambda = 1): 83.1 % (approx)\n')
import numpy as np from load import load, load_feature from logistic_regression import logistic_regression, predict if __name__ == '__main__': features = ["Pclass", "Sex"] x, y = load(features, "train.csv") theta = np.zeros((x.shape[0], 1)) theta = logistic_regression(x.transpose(), y, theta, 0.1, 500) x_test = load(features, "test.csv") y_predict = predict(x_test, theta) ids = load_feature("PassengerId", "data/test.csv") # print(ids) f = open("data/ans.csv", 'w') f.write("PassengerId,Survived\n") for i in range(418): f.write(str(ids[i])) f.write(',') f.write(str(y_predict[i][0])) f.write('\n') f.close() # f = open("features.txt", "a") # for i in range(len(features)): # f.write(features[i] + ", ") # f.write("\t" + str(accuracy) + "\n")
cost = result.fun # scipy.optimizeを利用して指定回数ループして探した最小値のcostを表示 print('Cost at theta found by scipy.optimize.minimize: %f' % cost) # 決定境界の表示 plot_data(X_original, y, show=False) u = np.linspace(-1, 1.5, 50) v = np.linspace(-1, 1.5, 50) z = np.zeros((u.size, v.size)) for i in range(u.size): for j in range(v.size): z[i, j] = map_feature(u[i], v[j]).dot(theta) plot.contour(u, v, z.T, 0) plot.show() # 予測値の出力 (plot図から、試験結果は中央の0に近ければ合格率が高い) test_data = [[0.1, -0.1], [-0.7, 0.2], [0.8, -0.1], [1.0, -1.0]] for data in test_data: test1 = data[0] test2 = data[1] x = map_feature(np.array(test1), np.array(test2)) prob = sigmoid(np.array(x).dot(theta)) print('試験結果が{}と{}だったマイクロチップが品質保証に合格している確率は{}'.format( test1, test2, prob)) # 精度の表示 predictions = predict(theta, X) accuracy = 100 * np.mean(predictions == y) print('Train accuracy: %0.2f %%' % accuracy)
correctBayes += 1 if correctBayes > maxBayes: maxBayes = correctBayes maxHyperParametersIndexBayes = i if correctAdaboost > maxAdaboost: maxAdaboost = correctAdaboost maxHyperParametersIndexAdaboost = i print("Training logistic regression") for i in range(len(logregrHyperParameters)): print(i) correctlogregr = 0 w8z = logistic_regression.train(sortedIGs, trainMailsList, logregrHyperParameters[i]) for incoming in development_mails: if (logistic_regression.predict(w8z, incoming, sortedIGs) > 0.5) == incoming[-1]: correctlogregr += 1 if correctlogregr > maxlogregr: maxlogregr = correctlogregr maxHyperParametersIndexlogregr = i # now test and plot mypath = join("pu_corpora_public", "pu3", "part" + "10") test_mails = [] for file in listdir(mypath): with open(join(mypath, file), "r") as f: templist = [] for line in f: for word in line.split(): if word not in templist and word.isnumeric(): #len(word) > 1
import plot import logistic_regression as lr data = lr.load_data('iris_data.csv') # Load the data plot.scatter_plot( data, ['Iris-setosa', 'Iris-versicolor']) # Scatter plot of the data X, y = lr.split(data) # Split into data and labels X_train, X_test, y_train, y_test = lr.train_test_split( X, y) # Split all the data into training and testing set theta = lr.SGD(X_train, y_train) # Run SGD to calculate optimal theta print('\nCalculated theta:\n {}'.format(theta)) hypothesis = lr.predict(X_test, theta) # Test the model lr.accuracy(hypothesis, y_test) plot.boundary(data, ['Iris-setosa', 'Iris-versicolor'], theta) # Plot the decision boundary
print("Calculated GD = \n", grad) # Compute and display cost and gradient with non-zero theta test_theta = np.array([[-24], [0.2], [0.2]]) J = lr.compute_cost(test_theta, X1, y) grad = lr.gradient_descent(test_theta, X1, y) print('Cost at test theta: {:7.3f}'.format(J)) # ans = 0.218 print('Gradient at test theta: \n', grad) # ans = [[0.043], [2.566], [2.647]] # overlay the decision boundary on the data # but, first compute the optimized theta for global min :: ans = [[-25.161], [0.206], [0.201]] theta = lr.optimizer_func(initial_theta, X1, y) print('Computed theta: ', theta) theta = np.vstack(theta) # now compute the decision boundary lr.decision_boundary(theta, X1, y) # test the model by running a prediction :: ans = 0.775 +/- 0.002 # for a student with score 45 on exam 1 and score 85 on exam 2 X_test = np.array([1, 45, 85]) prob = lr.sigmoid(np.dot(X_test, theta)) print("Probability of student with scores {} getting admitted = {}".format( X_test[[1, 2]], prob)) # calculate the overall accuracy of our model :: ans = 89.0 p = lr.predict(theta, X1) accuracy = np.sum(np.equal(p, y)) / m print("Accuracy of the model = {:7.3f}%".format(accuracy * 100))