def _negative_log_partial_j(x: Vector, y: float, beta: Vector, j: int) -> float: """ The j-th partial derivative for one data pont here i is the index of the data point """ return -(y - logistic(dot(x, beta))) * x[j]
def sqerror_gradients(network: List[List[Vector]], input_vector: Vevctor, target_vector: Vector) -> List[List[Vector]]: """Given a neural network, an input vector, and a target vector, make a prediction and compute the gradient of the squared error loss with respect to the neuron weights.""" # forward pass hidden_outputs, outputs = feed_forward(network, input_vector) # gradients with respect to output neuron pre-activation outputs output_deltas = [output * (1 - output) * (output - target) for output, target in zip(outputs, target_vector)] # gradients with respect to output neuron weights output_grads = [[output_deltas[i] * hidden_output for hidden_output in hidden_outputs +[1]] for i output_neuron in enumerate(network[-1])] #gradients with respect to hidden neuron pre-activation outputs hidden_deltas = [hidden_output * (1 - hidden_output) * dot(output_deltas, [n[i] for n in network[-1]]) for i, hidden_output in enumerate(hidden_outputs)] # gradients with respect to hidden neuron weights hidden_grads = [[hidden_deltas[i] * input for input in input_vector + [1]] for i, hidden_neuron in enumerate(network[0])] return [hidden_grads, output_grads]
def sum_of_squares(v: Vector) -> float: """ Computes the sum of squared elements in v""" return dot(v, v)
def main(): from matplotlib import pyplot as plt plt.close() plt.clf() plt.gca().clear() from matplotlib import pyplot as plt from datascience.working_data import rescale from datascience.multiple_regression import least_squares_fit, predict from datascience.gradient_descent import gradient_step learning_rate = 0.001 rescaled_xs = rescale(xs) beta = least_squares_fit(rescaled_xs, ys, learning_rate, 1000, 1) # [0.26, 0.43, -0.43] predictions = [predict(x_i, beta) for x_i in rescaled_xs] plt.scatter(predictions, ys) plt.xlabel("predicted") plt.ylabel("actual") # plt.show() plt.savefig('im/linear_regression_for_probabilities.png') plt.close() from datascience.machine_learning import train_test_split import random import tqdm random.seed(0) x_train, x_test, y_train, y_test = train_test_split(rescaled_xs, ys, 0.33) learning_rate = 0.01 # pick a random starting point beta = [random.random() for _ in range(3)] with tqdm.trange(5000) as t: for epoch in t: gradient = negative_log_gradient(x_train, y_train, beta) beta = gradient_step(beta, gradient, -learning_rate) loss = negative_log_likelihood(x_train, y_train, beta) t.set_description(f"loss: {loss:.3f} beta: {beta}") from datascience.working_data import scale means, stdevs = scale(xs) beta_unscaled = [(beta[0] - beta[1] * means[1] / stdevs[1] - beta[2] * means[2] / stdevs[2]), beta[1] / stdevs[1], beta[2] / stdevs[2]] # [8.9, 1.6, -0.000288] assert (negative_log_likelihood(xs, ys, beta_unscaled) == negative_log_likelihood( rescaled_xs, ys, beta)) true_positives = false_positives = true_negatives = false_negatives = 0 for x_i, y_i in zip(x_test, y_test): prediction = logistic(dot(beta, x_i)) if y_i == 1 and prediction >= 0.5: # TP: paid and we predict paid true_positives += 1 elif y_i == 1: # FN: paid and we predict unpaid false_negatives += 1 elif prediction >= 0.5: # FP: unpaid and we predict paid false_positives += 1 else: # TN: unpaid and we predict unpaid true_negatives += 1 precision = true_positives / (true_positives + false_positives) recall = true_positives / (true_positives + false_negatives) print(precision, recall) assert precision == 0.75 assert recall == 0.8 plt.clf() plt.gca().clear() predictions = [logistic(dot(beta, x_i)) for x_i in x_test] plt.scatter(predictions, y_test, marker='+') plt.xlabel("predicted probability") plt.ylabel("actual outcome") plt.title("Logistic Regression Predicted vs. Actual") # plt.show() plt.savefig('im/logistic_regression_predicted_vs_actual.png') plt.gca().clear()
def _negative_log_likelihood(x: Vector, y: float, beta: Vector) -> float: """The negative log likelihood for one data point""" if y == 1: return -math.log(logistic(dot(x, beta))) else: return -math.log(1 - logistic(dot(x, beta)))
def predict(x: Vector, beta: Vector) -> float: """ assume that the first element of x is 1""" return dot(x, beta)
def ridge_penalty(beta: Vector, alpha: Vector) -> float: return alpha * dot(beta[1:], beta[1:])
from datascience.linear_algebra import add def ridge_penalty_gradient(beta: Vector, alpha: float) -> Vector: """gradient of just the ridge penalty""" return [0.] + [2 * alpha * beta_j for beta_j in beta[1:]] def sqerror_ridge_gradient(x: Vector, y: flaot, beta: Vector, alpha: float) -> Vector: """ the gradient corresponding to the ith squared error term including the ridge penalty """ return add(sqerror_gradient(x, y, beta), ridge_penalty_gradient(beta, alpha)) random.seed(0) beta_0 = least_squares_fit_ridge(inputs, daily_minutes_good, 0.0, #alpha learning_rate, 5000, 25) assert 5 < dot(beta_0[1:], beta_0[1:]) < 6 assert 0.67 < multiple_r_squared(inputs, daily_minutes_good, beta_0) < 0.69 beta_0_1 = least_squares_fit_ridge(inputs, daily_minutes_good, 0.1, # alpha learning_rate, 5000, 25) # [30.8, 0.95, -1.83, 0.54] assert 4 < dot(beta_0_1[1:], beta_0_1[1:]) < 5 assert 0.67 < multiple_r_squared(inputs, daily_minutes_good, beta_0_1) < 0.69 beta_1 = least_squares_fit_ridge(inputs, daily_minutes_good, 1, # alpha learning_rate, 5000, 25) # [30.6, 0.90, -1.68, 0.10] assert 3 < dot(beta_1[1:], beta_1[1:]) < 4 assert 0.67 < multiple_r_squared(inputs, daily_minutes_good, beta_1) < 0.69 beta_10 = least_squares_fit_ridge(inputs, daily_minutes_good,10, # alpha
def transform_vector(v: Vector, components: List[Vector]) -> Vector: return [dot(v, w) for w in components]
def project(v: Vector, w: Vector) -> Vector: """return the projection of v onto the direction w""" projection_length = dot(v, w) return scalar_multiply(projection_length, w)
def directional_variance_gradient(data: List[Vector], w: Vector) -> Vector: """The gradient of directional variance with respect to w""" w_dir = direction(w) return [sum(2 * dot(v, w_dir) * v[i] for v in data) for i in range(len(w))]
def directional_variance(data: List[Vector], w: Vector) -> float: """ Returns the variance of x in the direction of w""" w_dir = direction(w) return sum(dot(v, w_dir)**2 for v in data)
def perceptron_output(weights: Vector, bias: float, x: Vector) -> float: """Returns 1 if the perceptron 'fires', 0 if not""" calculation = dot(weights, x) + bias return step_function(calculation)
def neuron_output(weights: Vector, inputs: Vector) -> float: # weights includes the bias term, inputs includes a 1 return sigmoid(dot(weights, inputs))