Exemplo n.º 1
0
def evaluate(func, timeout=30, start=0, end=6, step=10):
    start_time = time.time()
    for i in range(start, end):
        observations = 100 * step ** i
        times = []
        for j in range(5):
            np.random.seed(j)
            dataset = np.random.random((observations, 3))
            actual = 10*dataset[:,0] + 5*dataset[:,1] + 2*dataset[:,2] + \
                    dataset[:,0] ** 2 + 2 * dataset[:,1] ** 2
            iter_start = time.time()
            start_weights = func(dataset, actual, max_iter=0)
            end_weights = func(dataset, actual, start_weights)
            iter_end = time.time()
            start_error = RMSE.rmse(actual, 
                    linear_regression.predict(dataset, start_weights))
            end_error = RMSE.rmse(actual,
                    linear_regression.predict(dataset, end_weights))
            if end_error > start_error:
                raise Exception('Error has increased %f -> %f' % \
                        (start_error, end_error))
            if end_error > 0.9:
                raise Exception('Error is unusually high %f' % end_error)
            times.append(iter_end - iter_start)
            if time.time() > start_time + timeout:
                break
        print 'For %010d observations took %f seconds (av. from %d repeats)'\
                % (observations, np.array(times).mean(), len(times))
        print 'Last set of weights were %s and error went from %f to %f' % \
                (str(end_weights), start_error, end_error)
        if time.time() > start_time + timeout:
            break
Exemplo n.º 2
0
def predict_house_price(x, mu, sigma, theta):
    # Apply normalization with the values we got initially
    x_norm = (x - mu.values) / sigma.values
    # Add intercept term at first column
    x_norm = np.append(np.ones((x_norm.shape[0], 1)), x_norm, axis=1)

    return linear.predict(x_norm, theta)
Exemplo n.º 3
0
def cross_validation(k, train_data, feature_names, classifier):
    for index, item in enumerate(train_data):
        item.append(feature_names[index])
    random.shuffle(train_data)
    k_splits = np.array_split(train_data, k)
    feature_splits = [[in_item[-1] for in_item in item] for item in k_splits]
    all_accuracy = 0
    for k in range(0, k):
        print("For %s fold" % (int(k) + 1))
        trainX = []
        trainY = []
        testX = k_splits[k]
        testY = feature_splits[k]
        trainX_temp = k_splits[:k] + k_splits[(k + 1):]
        trainY_temp = feature_splits[:k] + feature_splits[(k + 1):]
        for x in range(len(trainX_temp)):
            trainX.extend(trainX_temp[x])
            trainY.extend(trainY_temp[x])
        if classifier == 1:
            matrix, accuracy = (linear_regression.predict(
                trainX, trainY, testX, testY))
        elif classifier == 2:
            accuracy = (centroid_classifier.predict(trainX, trainY, testX,
                                                    testY, 4))
        elif classifier == 3:
            accuracy = (kNN.getknnFit(trainX, testX, 4))
        print(abs(accuracy))
        all_accuracy += accuracy
    k_accuracy = float(all_accuracy) / 5
    return abs(k_accuracy)
Exemplo n.º 4
0
 def test_synthetic(self):
     jvars = 2
     isamples = 100
     h_theta = [3.2, 5.5, 4.3] 
     Z = [[random.random() for _ in range(jvars)] for _ in range(isamples)]
     y = predict(prepend_x0(Z), h_theta)
     h_thetad = self._common(Z, y)
     h_thetaa = [round(x, 1) for x in h_thetad]
     self.assertListEqual(h_theta, h_thetaa)
Exemplo n.º 5
0
 def test_synthetic(self):
     jvars = 2
     isamples = 100
     h_theta = [3.2, 5.5, 4.3]
     Z = [[random.random() for _ in range(jvars)] for _ in range(isamples)]
     y = predict(prepend_x0(Z), h_theta)
     h_thetad = self._common(Z, y)
     h_thetaa = [round(x, 1) for x in h_thetad]
     self.assertListEqual(h_theta, h_thetaa)
Exemplo n.º 6
0
def optimize(dataset, actual, weights=None, max_iter=1000, step_size=0.1):
    if weights == None:
        weights = np.random.random(dataset.shape[1])
    for i in range(max_iter):
        difference=[i-j for i,j in zip(lr.predict(dataset, weights), actual)]
        weights_change = [0] * len(weights)
        for obs, diff in zip(dataset, difference):
            for i,j in enumerate(obs):
                weights_change[i] += j * diff
        weights = [w-(step_size/len(dataset))*wc for w,wc in \
                zip(weights,weights_change)]
    return weights
Exemplo n.º 7
0
def predict(input):
    x = []
    y0 = []
    y1 = []
    y2 = []
    year = get_year()
    for line in data_lines:
        date, temp, hum, wind = line.split(';')
        date = get_x(date)
        x.append(date)
        if temp is "" or temp is " ":
            temp = y0[-1]
        y0.append(float(temp))
        if hum is "" or hum is " ":
            hum = y1[-1]
        y1.append(float(hum))

        wind = wind.split('\n')
        if wind[0] is "" or wind[0] is " ":
            wind[0] = y2[-1]
        y2.append(float(wind[0]))

    _x = x.copy()

    date = input
    date = year + date[4:]
    date = get_x(date)

    b0, b1_temp = lr.get_coefs(x, y0)
    temp_predict = lr.predict(b0, b1_temp, date)

    x = _x.copy()
    b0, b1_umid = lr.get_coefs(x, y1)
    umid_predict = lr.predict(b0, b1_umid, date)

    x = _x.copy()
    b0, b1_vant = lr.get_coefs(x, y2)
    vant_predict = lr.predict(b0, b1_vant, date)

    return temp_predict, umid_predict, vant_predict
import numpy as np
import linear_regression

# Import the dataset
X, Y = np.loadtxt("pizza.txt", skiprows=1, unpack=True)

# Train the system with a learning rate of 0.00001
w, b = linear_regression.train(X, Y, iterations=10000, lr=0.00001)
print("\nw=%.3f, b=%.3f" % (w, b))

# Predict the number of pizzas
print("Prediction: x=%d => y=%.2f" % (20, linear_regression.predict(20, w, b)))
Exemplo n.º 9
0
def predict_profit(population, params, print_msg=True):
    prediction = linear.predict(np.array([[1, population / 10000]]), params)
    if print_msg:
        print('For a population of {0}, we predict a profit of {1}'.format(
            population, prediction * 10000))
    return prediction
Exemplo n.º 10
0
train_data = proc.normalize_l2(train_data)

# Training process using K-Fold
models = linear.kfold(model_params, train_data, train_labels, n_folds, verbose, generate_graphs)

# Get best model on the K-Fold training using Mean squared error
best_model = models[models[:, 1][0].argmax()]

if generate_graphs:
    # learning curve
    #graphs.plot_learning_curve(best_model[0].steps[1][1], "TESTE", train_data, train_labels)

    # Generating cost vs iterations
    costs = best_model[2]
    iterations = np.arange(costs.shape[0]) + 1
    graphs.line_plot("CostXInteractions", "Custo vs Iterações", "Iterações", "Custo", iterations, costs)

# Reading test file
test_file = np.loadtxt('../dataset/year-prediction-msd-test.txt', delimiter=',')

# Divide data from labels
test_labels = test_file[:, 0]
test_data = test_file[:, 1:]

# Pre-prossesing test
test_data = proc.normalize_l2(test_data)

# Predicting test
print("Results on Test dataset")
linear.predict(best_model[0], test_data, test_labels, verbose)
Exemplo n.º 11
0
    print('creating tmp directory')
    os.mkdir('tmp')
print('done creating temp file ' + str(tempfile) + ', writing data to it')
with open(tempfile, 'w') as w:
    w.write(test_data)

print('done writing to tempfile, creating pandas dataframe')
df = pd.read_csv(tempfile)
print('done creating a pandas dataframe')

X = df[[
    'Avg. Area Income', 'Avg. Area House Age', 'Avg. Area Number of Rooms',
    'Avg. Area Number of Bedrooms', 'Area Population'
]]

print('running the model against test inputs')
predictions = linear_regression.predict(X)

print('writing predictions to the s3 bucket')
predictions_file_name = str(datetime.datetime.now().time(
)) + '_' + test_file_path.split('/')[-1].split('.')[0] + '.txt'

predictions_file_path = predictions_file_name
with open(predictions_file_path, 'w') as f:
    for prediction in predictions:
        f.write(str(prediction[0]) + '\n')

s3.write_file_to_bucket('predictions', predictions_file_name,
                        predictions_file_path)
print('done writing predictions to the s3 bucket')
Exemplo n.º 12
0
 predictions, it should contain a vector of the same shape as the observed values.
 The first 13 columns of the dataset are the independent variable and the last column is the dependent
 vars. 
 
 Expected outputs of the model are the optimal coefficients, and a plot of the ground truth
 versus the predictions.
 
 UPDATE MODEL DETERMINING WHICH COEF IS THE BEST, IE LOOP THROUGH DATASET AND KEEP THE DATA WITH THE
 LEAST ERROR'''
 independent_var = regtrain.iloc[0:206, 0:13]
 dependent_var = regtrain.iloc[0:206, -1]
 observed_values = regtest.iloc[:, -1]
 test_inputs = regtest.iloc[:, 0:13]
 linearModel = linear_regression.linear_model(
     independent_var, dependent_var, linear_regression.linear_prediction)
 predictions = linear_regression.predict(linearModel, test_inputs)
 print(np.mean(linearModel.intercept))
 LR_predictions = linear_regression.Linear_Regression(
     test_inputs, observed_values)
 display_predictions = predictions.T
 LR = plt.plot(observed_values, predictions.T, 'bo')
 plt.show()
 ax = sns.heatmap(linearModel.slope)
 plt.show()
 print(np.sum(linearModel.intercept))
 '''For part two of this assignment, we simple use a ridge regression model to test the outputs
 of various thresholds and create an ROC curve for each threshold to see how the model performs.
 The expected outputs it to see sparsity of the output matrix minimize but as the theshold increases
 toward infinity, you can expect the values to be zeroed out
 
 OUTPUTS: TPR/FPR, ROC for each threshold'''
Exemplo n.º 13
0
print(
    "############################################################################"
)
print("KNN method: ")
acu_knn = []
kn_acc = kNN.getknnFit(trainX, testX, 4)
acu_knn.append(kn_acc)
print("KNN Method Accuracy %s" % kn_acc)

print(
    "############################################################################"
)
print("SVM method: ")
acu_svm = []
svm_acc = SVM.kfold_SVM(trainX, trainY, testX, testY)
acu_svm.append(svm_acc)
print("Accuracy for SVM Method %s" % (svm_acc * 100))

print(
    "############################################################################"
)
print("Linear Regression Method: ")
acu_lin_reg = []
result, lin_reg_acc = linear_regression.predict(trainX, trainY, testX, testY)
acu_lin_reg.append(lin_reg_acc)
print("Linear Regression accuracy %s" % lin_reg_acc)
print(
    "############################################################################"
)
Exemplo n.º 14
0
def test_predict():
    X = np.array([[1.0, 2.0], [1.0, 0.0], [1.0, -2.0]])
    w = np.array([1.5, 0.3])
    y = predict(w, X)
    assert_array_almost_equal(y, np.array([2.1, 1.5, 0.9]))