def evaluate(func, timeout=30, start=0, end=6, step=10): start_time = time.time() for i in range(start, end): observations = 100 * step ** i times = [] for j in range(5): np.random.seed(j) dataset = np.random.random((observations, 3)) actual = 10*dataset[:,0] + 5*dataset[:,1] + 2*dataset[:,2] + \ dataset[:,0] ** 2 + 2 * dataset[:,1] ** 2 iter_start = time.time() start_weights = func(dataset, actual, max_iter=0) end_weights = func(dataset, actual, start_weights) iter_end = time.time() start_error = RMSE.rmse(actual, linear_regression.predict(dataset, start_weights)) end_error = RMSE.rmse(actual, linear_regression.predict(dataset, end_weights)) if end_error > start_error: raise Exception('Error has increased %f -> %f' % \ (start_error, end_error)) if end_error > 0.9: raise Exception('Error is unusually high %f' % end_error) times.append(iter_end - iter_start) if time.time() > start_time + timeout: break print 'For %010d observations took %f seconds (av. from %d repeats)'\ % (observations, np.array(times).mean(), len(times)) print 'Last set of weights were %s and error went from %f to %f' % \ (str(end_weights), start_error, end_error) if time.time() > start_time + timeout: break
def predict_house_price(x, mu, sigma, theta): # Apply normalization with the values we got initially x_norm = (x - mu.values) / sigma.values # Add intercept term at first column x_norm = np.append(np.ones((x_norm.shape[0], 1)), x_norm, axis=1) return linear.predict(x_norm, theta)
def cross_validation(k, train_data, feature_names, classifier): for index, item in enumerate(train_data): item.append(feature_names[index]) random.shuffle(train_data) k_splits = np.array_split(train_data, k) feature_splits = [[in_item[-1] for in_item in item] for item in k_splits] all_accuracy = 0 for k in range(0, k): print("For %s fold" % (int(k) + 1)) trainX = [] trainY = [] testX = k_splits[k] testY = feature_splits[k] trainX_temp = k_splits[:k] + k_splits[(k + 1):] trainY_temp = feature_splits[:k] + feature_splits[(k + 1):] for x in range(len(trainX_temp)): trainX.extend(trainX_temp[x]) trainY.extend(trainY_temp[x]) if classifier == 1: matrix, accuracy = (linear_regression.predict( trainX, trainY, testX, testY)) elif classifier == 2: accuracy = (centroid_classifier.predict(trainX, trainY, testX, testY, 4)) elif classifier == 3: accuracy = (kNN.getknnFit(trainX, testX, 4)) print(abs(accuracy)) all_accuracy += accuracy k_accuracy = float(all_accuracy) / 5 return abs(k_accuracy)
def test_synthetic(self): jvars = 2 isamples = 100 h_theta = [3.2, 5.5, 4.3] Z = [[random.random() for _ in range(jvars)] for _ in range(isamples)] y = predict(prepend_x0(Z), h_theta) h_thetad = self._common(Z, y) h_thetaa = [round(x, 1) for x in h_thetad] self.assertListEqual(h_theta, h_thetaa)
def optimize(dataset, actual, weights=None, max_iter=1000, step_size=0.1): if weights == None: weights = np.random.random(dataset.shape[1]) for i in range(max_iter): difference=[i-j for i,j in zip(lr.predict(dataset, weights), actual)] weights_change = [0] * len(weights) for obs, diff in zip(dataset, difference): for i,j in enumerate(obs): weights_change[i] += j * diff weights = [w-(step_size/len(dataset))*wc for w,wc in \ zip(weights,weights_change)] return weights
def predict(input): x = [] y0 = [] y1 = [] y2 = [] year = get_year() for line in data_lines: date, temp, hum, wind = line.split(';') date = get_x(date) x.append(date) if temp is "" or temp is " ": temp = y0[-1] y0.append(float(temp)) if hum is "" or hum is " ": hum = y1[-1] y1.append(float(hum)) wind = wind.split('\n') if wind[0] is "" or wind[0] is " ": wind[0] = y2[-1] y2.append(float(wind[0])) _x = x.copy() date = input date = year + date[4:] date = get_x(date) b0, b1_temp = lr.get_coefs(x, y0) temp_predict = lr.predict(b0, b1_temp, date) x = _x.copy() b0, b1_umid = lr.get_coefs(x, y1) umid_predict = lr.predict(b0, b1_umid, date) x = _x.copy() b0, b1_vant = lr.get_coefs(x, y2) vant_predict = lr.predict(b0, b1_vant, date) return temp_predict, umid_predict, vant_predict
import numpy as np import linear_regression # Import the dataset X, Y = np.loadtxt("pizza.txt", skiprows=1, unpack=True) # Train the system with a learning rate of 0.00001 w, b = linear_regression.train(X, Y, iterations=10000, lr=0.00001) print("\nw=%.3f, b=%.3f" % (w, b)) # Predict the number of pizzas print("Prediction: x=%d => y=%.2f" % (20, linear_regression.predict(20, w, b)))
def predict_profit(population, params, print_msg=True): prediction = linear.predict(np.array([[1, population / 10000]]), params) if print_msg: print('For a population of {0}, we predict a profit of {1}'.format( population, prediction * 10000)) return prediction
train_data = proc.normalize_l2(train_data) # Training process using K-Fold models = linear.kfold(model_params, train_data, train_labels, n_folds, verbose, generate_graphs) # Get best model on the K-Fold training using Mean squared error best_model = models[models[:, 1][0].argmax()] if generate_graphs: # learning curve #graphs.plot_learning_curve(best_model[0].steps[1][1], "TESTE", train_data, train_labels) # Generating cost vs iterations costs = best_model[2] iterations = np.arange(costs.shape[0]) + 1 graphs.line_plot("CostXInteractions", "Custo vs Iterações", "Iterações", "Custo", iterations, costs) # Reading test file test_file = np.loadtxt('../dataset/year-prediction-msd-test.txt', delimiter=',') # Divide data from labels test_labels = test_file[:, 0] test_data = test_file[:, 1:] # Pre-prossesing test test_data = proc.normalize_l2(test_data) # Predicting test print("Results on Test dataset") linear.predict(best_model[0], test_data, test_labels, verbose)
print('creating tmp directory') os.mkdir('tmp') print('done creating temp file ' + str(tempfile) + ', writing data to it') with open(tempfile, 'w') as w: w.write(test_data) print('done writing to tempfile, creating pandas dataframe') df = pd.read_csv(tempfile) print('done creating a pandas dataframe') X = df[[ 'Avg. Area Income', 'Avg. Area House Age', 'Avg. Area Number of Rooms', 'Avg. Area Number of Bedrooms', 'Area Population' ]] print('running the model against test inputs') predictions = linear_regression.predict(X) print('writing predictions to the s3 bucket') predictions_file_name = str(datetime.datetime.now().time( )) + '_' + test_file_path.split('/')[-1].split('.')[0] + '.txt' predictions_file_path = predictions_file_name with open(predictions_file_path, 'w') as f: for prediction in predictions: f.write(str(prediction[0]) + '\n') s3.write_file_to_bucket('predictions', predictions_file_name, predictions_file_path) print('done writing predictions to the s3 bucket')
predictions, it should contain a vector of the same shape as the observed values. The first 13 columns of the dataset are the independent variable and the last column is the dependent vars. Expected outputs of the model are the optimal coefficients, and a plot of the ground truth versus the predictions. UPDATE MODEL DETERMINING WHICH COEF IS THE BEST, IE LOOP THROUGH DATASET AND KEEP THE DATA WITH THE LEAST ERROR''' independent_var = regtrain.iloc[0:206, 0:13] dependent_var = regtrain.iloc[0:206, -1] observed_values = regtest.iloc[:, -1] test_inputs = regtest.iloc[:, 0:13] linearModel = linear_regression.linear_model( independent_var, dependent_var, linear_regression.linear_prediction) predictions = linear_regression.predict(linearModel, test_inputs) print(np.mean(linearModel.intercept)) LR_predictions = linear_regression.Linear_Regression( test_inputs, observed_values) display_predictions = predictions.T LR = plt.plot(observed_values, predictions.T, 'bo') plt.show() ax = sns.heatmap(linearModel.slope) plt.show() print(np.sum(linearModel.intercept)) '''For part two of this assignment, we simple use a ridge regression model to test the outputs of various thresholds and create an ROC curve for each threshold to see how the model performs. The expected outputs it to see sparsity of the output matrix minimize but as the theshold increases toward infinity, you can expect the values to be zeroed out OUTPUTS: TPR/FPR, ROC for each threshold'''
print( "############################################################################" ) print("KNN method: ") acu_knn = [] kn_acc = kNN.getknnFit(trainX, testX, 4) acu_knn.append(kn_acc) print("KNN Method Accuracy %s" % kn_acc) print( "############################################################################" ) print("SVM method: ") acu_svm = [] svm_acc = SVM.kfold_SVM(trainX, trainY, testX, testY) acu_svm.append(svm_acc) print("Accuracy for SVM Method %s" % (svm_acc * 100)) print( "############################################################################" ) print("Linear Regression Method: ") acu_lin_reg = [] result, lin_reg_acc = linear_regression.predict(trainX, trainY, testX, testY) acu_lin_reg.append(lin_reg_acc) print("Linear Regression accuracy %s" % lin_reg_acc) print( "############################################################################" )
def test_predict(): X = np.array([[1.0, 2.0], [1.0, 0.0], [1.0, -2.0]]) w = np.array([1.5, 0.3]) y = predict(w, X) assert_array_almost_equal(y, np.array([2.1, 1.5, 0.9]))