import numpy as np import matplotlib.pyplot as plt from income.setup import setup X, Y = setup() X = X["avg_dep"] denominator = X.dot(X) - X.mean() * X.sum() a = (X.dot(Y) - Y.mean() * X.sum()) / denominator b = (Y.mean() * X.dot(X) - X.mean() * X.dot(Y)) / denominator Yhat = a * X + b plt.scatter(X, Y) plt.plot(X, Yhat) plt.show() d1 = Y - Yhat d2 = Y - Y.mean() r2 = 1 - d1.dot(d1) / d2.dot(d2) print("a: ", a, "b: ", b) print("the r-squared is:", r2)
from income.setup import setup, construct_nn import theano import numpy as np import pymc3 as pm X_train, Y_train, X_test, Y_test = setup( training_data="../data/income/2014.csv", test_data="../data/income/2015.csv") ann_input = theano.shared(np.array(X_train)) ann_output = theano.shared(np.array(Y_train['avg_total_income'])) neural_network = construct_nn(ann_input, ann_output) with neural_network: inference = pm.ADVI() approx = pm.fit(n=3000, method=inference) ann_input.set_value(X_test) ann_output.set_value(Y_test['avg_total_income']) trace = approx.sample(draws=500) with neural_network: ppc = pm.sample_ppc(trace, samples=500, progressbar=True) approx
import numpy as np import matplotlib.pyplot as plt from mpl_toolkits.mplot3d import Axes3D from income.setup import setup X, Y = setup(100) X2 = [] for x in X["avg_dep"]: X2.append([1, x, x * x]) X = np.array(X2) # plt.scatter(X[:, 1], Y) # plt.show() w = np.linalg.solve(np.dot(X.T, X), np.dot(X.T, Y)) Yhat = np.dot(X, w) plt.scatter(X[:, 1], Y) plt.plot(sorted(X[:, 1]), sorted(Yhat)) plt.show() Yhat = X.dot(w) d1 = Y - Yhat d2 = Y - Y.mean() r2 = 1 - d1.dot(d1) / d2.dot(d2) print("the r-squared is:", r2)
import numpy as np import matplotlib.pyplot as plt from income.setup import setup from income.util import getScaler X, Y = setup(sample_size=100) columns = ['agi_stub', 'zipcode'] X = X[columns] scalers = {} scaler1 = getScaler(X, columns, scalers) w = np.random.randn(X.shape[1]) z = X.dot(w) b = 0 def sigmoid(z): return 1 / (1 + np.exp(-z)) def forward(X, W, b): return sigmoid(X.dot(W) + b) P_Y_given_X = forward(X, w, b) predictions = np.round(P_Y_given_X) def classification_rate(Y, P):