def neuron_output(weights, inputs): a = sigmoid(dot(weights, inputs)) print "weights :", weights print "inputs :", inputs print "dot :", dot(weights, inputs) print "sigmoid :", a return a
def _negative_log_likelihood(x: Vector, y: float, weights: Vector) -> float: """ Return negative log likehood of an input (dotted by function weights) The dot product transform by the logistic function is 'p' Therefore for y = 1, we will return 1 - p and if y = 0, we will just return p """ if y == 1: return -math.log(1 - logistic(dot(x, weights))) elif y == 0: return -math.log(logistic(dot(x, weights))) else: raise ValueError(f"invalid y value: {y}")
def test_multiply_matrix_matrix_03(self): A = [[11, 12, 13], [21, 22, 23]] B = [[111, 112], [121, 122], [131, 132]] C = la.multiply_matrix_matrix(A, B) BT = zip(*B) expected = [[la.dot(A[0], BT[0]), la.dot(A[0], BT[1])], [la.dot(A[1], BT[0]), la.dot(A[1], BT[1])]] self.assertSequenceEqual(C, expected)
def backpropagate(network, input_vector, target): hidden_outputs, outputs = feed_forward(network, input_vector) '''the output * (1 - output) is from the derivative of sigmoid''' output_deltas = [output * (1 - output) * (output - target[i]) for i, output in enumerate(outputs)] '''adjust weights for output layer (network[-1])''' for i, output_neuron in enumerate(network[-1]): '''focus on the ith output layer neuron''' for j, hidden_output in enumerate(hidden_outputs + [1]): '''-adjust the jth weight based on both this neuron's delta and its jth input''' output_neuron[j] = output_neuron[j] - output_deltas[i] * hidden_output '''back-propagate errors to hidden layer''' hidden_deltas = [hidden_output * (1 - hidden_output) * dot(output_deltas, [n[i] for n in network[-1]]) for i, hidden_output in enumerate(hidden_outputs)] '''adjust weights for hidden layer (network[0])''' for i, hidden_neuron in enumerate(network[0]): for j, input in enumerate(input_vector + [1]): hidden_neuron[j] = hidden_neuron[j] - hidden_deltas[i] * input
def logistic_log_likelihood_i(x_i, y_i, beta): # Implement the log likelihood function for 1 training example. f = logistic(dot(x_i, beta)) if y_i == 1: return math.log(f) else: return math.log(1 - f)
def logistic_log_partial_ij(x_i, y_i, beta, j): # Implement the computation of partial derivative of log likelihood w.r.t. beta_j. """here i is the index of the data point, j the index of the derivative""" f = logistic(dot(x_i, beta)) return (y_i - f) * x_i[j]
def test_dot_04(self): a = [1.0, 2.0, 3.0] b = [0.0, 1.0, 0.0] c = la.dot(a, b) expected = 2.0 self.assertAlmostEqual(c, expected)
def sqerror_gradients(network: List[List[Vector]], input_vector: Vector, target_vector: Vector) -> List[List[Vector]]: """ Given a neural network, an input vector, and a target vector, make a prediction and compute the gradient of the squared error loss with respect to the neuron weights. """ # forward pass hidden_outputs, outputs = feed_forward(network, input_vector) # gradients with respect to output neuron pre-activation outputs output_deltas = [ output * (1 - output) * (output - target) for output, target in zip(outputs, target_vector) ] # gradients with respect to output neuron weights output_grads = [[ output_deltas[i] * hidden_output for hidden_output in hidden_outputs + [1] ] for i, output_neuron in enumerate(network[-1])] # gradients with respect to hidden neuron pre-activation outputs hidden_deltas = [ hidden_output * (1 - hidden_output) * dot(output_deltas, [n[i] for n in network[-1]]) for i, hidden_output in enumerate(hidden_outputs) ] # gradients with respect to hidden neuron weights hidden_grads = [[hidden_deltas[i] * input for input in input_vector + [1]] for i, hidden_neuron in enumerate(network[0])] return [hidden_grads, output_grads]
def _negative_log_partial_j(x: Vector, y: float, beta: Vector, j: int) -> float: """ The j-th partial derivative for one data pont here i is the index of the data point """ return -(y - logistic(dot(x, beta))) * x[j]
def neuron_output(weights, inputs): """ print weights print "----------------------------" print inputs """ return sigmoid(dot(weights, inputs))
def backpropagate(network, input_vector, targets): output_layer = network[-1] hidden_outputs, outputs = feed_forward(network, input_vector) # the output * (1 - output) is from the derivative of sigmoid output_deltas = [ output * (1 - output) * (output - target) for output, target in zip(outputs, targets) ] # adjust weights for output layer, one neuron at a time for i, output_neuron in enumerate(network[-1]): # focus on the ith output layer neuron for j, hidden_output in enumerate(hidden_outputs + [1]): # adjust the jth weight based on both # this neuron's delta and its jth input output_neuron[j] -= output_deltas[i] * hidden_output # back-propagate errors to hidden layer hidden_deltas = [ hidden_output * (1 - hidden_output) * dot(output_deltas, [n[i] for n in output_layer]) for i, hidden_output in enumerate(hidden_outputs) ] # adjust weights for hidden layer, one neuron at a time for i, hidden_neuron in enumerate(network[0]): for j, input in enumerate(input_vector + [1]): hidden_neuron[j] -= hidden_deltas[i] * input
def backpropagate(network, input_vector, target): hidden_outputs, outputs = feed_forward(network, input_vector) # the output * (1 - output) is from the derivative of sigmoid output_deltas = [ output * (1 - output) * (output - target[i]) for i, output in enumerate(outputs) ] # adjust weights for output layer (network[-1]) for i, output_neuron in enumerate(network[-1]): for j, hidden_output in enumerate(hidden_outputs + [1]): output_neuron[j] -= output_deltas[i] * hidden_output # back-propagate errors to hidden layer hidden_deltas = [ hidden_output * (1 - hidden_output) * dot(output_deltas, [n[i] for n in network[-1]]) for i, hidden_output in enumerate(hidden_outputs) ] # adjust weights for hidden layer (network[0]) for i, hidden_neuron in enumerate(network[0]): for j, input in enumerate(input_vector + [1]): hidden_neuron[j] -= hidden_deltas[i] * input
def test_dot_05(self): a = [ 3.0, 4.0] b = [-4.0, 3.0] c = la.dot(a, b) expected = 0.0 self.assertAlmostEqual(c, expected)
def setup2(self): random.seed(1) aNeuron = Neuron(n_inputs=2, activation_function=sigmoid_function, weight_init_function=weight_init_function_random, learning_rate_function=learning_rate_function) aNeuron.weights = [1.0,0.5,0.25] inputs = [2.0,1.5,1.0] output = aNeuron.calc_neurons_output(inputs) expected = sigmoid_function( dot(inputs, aNeuron.weights) ) return output, expected
def directional_differenciate(f: Callable[[Vector], float], v: Vector, u: Vector) -> float: ''' return the rate of change in the dirrection of the unit vector u let f(x, y, z, ...) map f: R x ... x R -> R. Assume f is differentiable; therefore, continuous: D_u f(x, y, z, ...) = \/f * u, where \/f is the gradient vector of f ''' return dot(gradient_vector(f, v), u)
def forward(self, input: Tensor) -> Tensor: # Save the input to use in the backward pass. self.input = input # Return the vector of neuron outputs. return [ dot(input, self.w[o]) + self.b[o] for o in range(self.output_dim) ]
def neuron_output(weights, inputs): a = sigmoid(dot(weights, inputs)) # print "\nneuron_output..." # print "weights :", weights # print "inputs :", inputs # print "dot :", dot(weights, inputs) # print "sigmoid :", a #input() return a
def test_dot_01(self): a = (1.0, 1.0) b = (1.0, 1.0) c = la.dot(a, b) expected = 2.0 self.assertSequenceEqual(a, (1.0, 1.0)) self.assertSequenceEqual(b, (1.0, 1.0)) self.assertAlmostEqual(c, expected)
def covariance(x, y): """ x and y should have the same length :x: first variable (list) :y: second variable (list) :returns: integer """ n = len(x) return dot(de_mean(x), de_mean(y)) / (n - 1)
def setup2(self): random.seed(1) aNeuron = Neuron(n_inputs=2, activation_function=sigmoid_function, weight_init_function=weight_init_function_random, learning_rate_function=learning_rate_function) aNeuron.weights = [1.0, 0.5, 0.25] inputs = [2.0, 1.5, 1.0] output = aNeuron.calc_neurons_output(inputs) expected = sigmoid_function(dot(inputs, aNeuron.weights)) return output, expected
def classification_performance_summary(x_test, y_test, beta): true_positives = 0 false_positives = 0 true_negatives = 0 false_negatives = 0 for x_i, y_i in zip(x_test, y_test): predict = round(logistic(dot(beta_hat, x_i))) if y_i == 1 and predict == 1: true_positives += 1 elif y_i == 1 and predict == 0: false_negatives += 1 elif y_i == 0 and predict == 1: false_positives += 1 else: true_negatives += 1 return true_positives, false_positives, true_negatives, false_negatives
def make_graph_dot_product_as_vector_projection(plt): v = [2, 1] w = [math.sqrt(.25), math.sqrt(.75)] c = dot(v, w) vonw = scalar_multiply(c, w) o = [0, 0] plt.arrow(0, 0, v[0], v[1], width=0.002, head_width=.1, length_includes_head=True) plt.annotate("v", v, xytext=[v[0] + 0.1, v[1]]) plt.arrow(0, 0, w[0], w[1], width=0.002, head_width=.1, length_includes_head=True) plt.annotate("w", w, xytext=[w[0] - 0.1, w[1]]) plt.arrow(0, 0, vonw[0], vonw[1], length_includes_head=True) plt.annotate(u"(vâ¢w)w", vonw, xytext=[vonw[0] - 0.1, vonw[1] + 0.1]) plt.arrow(v[0], v[1], vonw[0] - v[0], vonw[1] - v[1], linestyle='dotted', length_includes_head=True) plt.scatter(*zip(v, w, o), marker='.') plt.axis('equal') plt.show()
def backpropagate(network, input_vector, target): hidden_outputs, outputs = feed_forward(network, input_vector) # print "\nhidden_outputs :", hidden_outputs # print "outputs:", outputs # for i, output in enumerate(outputs): # print target[i] # print "target:", target[i] # the output * (1 - output) is from the derivative of sigmoid output_deltas = [ output * (1 - output) * (output - target[i]) for i, output in enumerate(outputs) ] # print "output_deltas :\n", output_deltas # print "network[-1] before :\n", network[-1] # adjust weights for output layer (network[-1]) for i, output_neuron in enumerate(network[-1]): for j, hidden_output in enumerate(hidden_outputs + [1]): # print "before: \n", output_neuron[j] # print hidden_output # print hidden_output * output_deltas[i] output_neuron[j] -= output_deltas[i] * hidden_output # print "after : \n", output_neuron[j] # print "network[-1] after :\n", network[-1] # back-propagate errors to hidden layer hidden_deltas = [ hidden_output * (1 - hidden_output) * dot(output_deltas, [n[i] for n in network[-1]]) for i, hidden_output in enumerate(hidden_outputs) ] # adjust weights for hidden layer (network[0]) for i, hidden_neuron in enumerate(network[0]): for j, input1 in enumerate(input_vector + [1]): hidden_neuron[j] -= hidden_deltas[i] * input1
def backpropagate(network, input_vector, target): hidden_outputs, outputs = feed_forward(network, input_vector) # the output * (1 - output) is from the derivative of sigmoid output_deltas = [output * (1 - output) * (output - target[i]) for i, output in enumerate(outputs)] # adjust weights for output layer (network[-1]) for i, output_neuron in enumerate(network[-1]): for j, hidden_output in enumerate(hidden_outputs + [1]): output_neuron[j] -= output_deltas[i] * hidden_output # back-propagate errors to hidden layer hidden_deltas = [hidden_output * (1 - hidden_output) * dot(output_deltas, [n[i] for n in network[-1]]) for i, hidden_output in enumerate(hidden_outputs)] # adjust weights for hidden layer (network[0]) for i, hidden_neuron in enumerate(network[0]): for j, input in enumerate(input_vector + [1]): hidden_neuron[j] -= hidden_deltas[i] * input
def project(v, w): """returns the projection of v onto the direction w""" projection_length = dot(v, w) return scalar_multiply(projection_length, w)
def directional_variance_i(x_i, w): """the variance of the row x_i in the direction w""" return dot(x_i, direction(w)) ** 2
def logistic_log_likelihood_i(x_i, y_i, beta): if y_i == 1: return math.log(logistic(dot(x_i, beta))) else: return math.log(1 - logistic(dot(x_i, beta)))
def covariance(x, y): n = len(x) return dot(de_mean(x), de_mean(y)) / (n - 1)
random.seed(0) # so that you get the same results as me bootstrap_betas = bootstrap_statistic(list(zip(x, daily_minutes_good)), estimate_sample_beta, 100) bootstrap_standard_errors = [ standard_deviation([beta[i] for beta in bootstrap_betas]) for i in range(4)] print("bootstrap standard errors", bootstrap_standard_errors) print() print("p_value(30.63, 1.174)", p_value(30.63, 1.174)) print("p_value(0.972, 0.079)", p_value(0.972, 0.079)) print("p_value(-1.868, 0.131)", p_value(-1.868, 0.131)) print("p_value(0.911, 0.990)", p_value(0.911, 0.990)) print() print("regularization") random.seed(0) for alpha in [0.0, 0.01, 0.1, 1, 10]: beta = estimate_beta_ridge(x, daily_minutes_good, alpha=alpha) print("alpha", alpha) print("beta", beta) print("dot(beta[1:],beta[1:])", dot(beta[1:], beta[1:])) print("r-squared", multiple_r_squared(x, daily_minutes_good, beta)) print()
def predict(x_i, beta): return dot(x_i, beta)
def matrix_product_entry(A, B, i, j): return dot(get_row(A, i), get_column(B, j))
def directional_variance_i(x_i, w): return dot(x_i, direction(w)) ** 2
def directional_variance_gradient_i(x_i, w): projectionl_length = dot(x_i, direction(w)) return [2 * projection_length * x_ij for x_ij in x_i]
def logistic_log_partial_ij(x_i, y_i, beta, j): """here i is the index of the data point, j the index of the derivative""" return (y_i - logistic(dot(x_i, beta))) * x_i[j]
def transform_vector(v, components): return [dot(v, w) for w in components]
def project(v, w): """return the projection of v onto w""" coefficient = dot(v, w) return scalar_multiply(coefficient, w)
def directional_variance_gradient_i(x_i, w): """the contribution of row x_i to the gradient of the direction-w variance""" projection_length = dot(x_i, direction(w)) return [2 * projection_length * x_ij for x_ij in x_i]
# and maximize using gradient descent beta_hat = maximize_batch(fn, gradient_fn, beta_0) print("beta_batch", beta_hat) beta_0 = [1, 1, 1] beta_hat = maximize_stochastic(logistic_log_likelihood_i, logistic_log_gradient_i, x_train, y_train, beta_0) print("beta stochastic", beta_hat) true_positives = false_positives = true_negatives = false_negatives = 0 for x_i, y_i in zip(x_test, y_test): predict = logistic(dot(beta_hat, x_i)) if y_i == 1 and predict >= 0.5: # TP: paid and we predict paid true_positives += 1 elif y_i == 1: # FN: paid and we predict unpaid false_negatives += 1 elif predict >= 0.5: # FP: unpaid and we predict paid false_positives += 1 else: # TN: unpaid and we predict unpaid true_negatives += 1 precision = true_positives / (true_positives + false_positives) recall = true_positives / (true_positives + false_negatives) print("precision", precision) print("recall", recall)
random.seed(0) # so that you get the same results as me bootstrap_betas = bootstrap_statistic(zip(x, daily_minutes_good), estimate_sample_beta, 100) bootstrap_standard_errors = [ standard_deviation([beta[i] for beta in bootstrap_betas]) for i in range(4) ] print "bootstrap standard errors", bootstrap_standard_errors print print "p_value(30.63, 1.174)", p_value(30.63, 1.174) print "p_value(0.972, 0.079)", p_value(0.972, 0.079) print "p_value(-1.868, 0.131)", p_value(-1.868, 0.131) print "p_value(0.911, 0.990)", p_value(0.911, 0.990) print print "regularization" random.seed(0) for alpha in [0.0, 0.01, 0.1, 1, 10]: beta = estimate_beta_ridge(x, daily_minutes_good, alpha=alpha) print "alpha", alpha print "beta", beta print "dot(beta[1:],beta[1:])", dot(beta[1:], beta[1:]) print "r-squared", multiple_r_squared(x, daily_minutes_good, beta) print
def covariance(x, y): n = len(x) return dot(de_mean(x), de_mean(y)) / (n-1)
def ridge_penalty(beta, alpha): return alpha * dot(beta[1:], beta[1:])
def neuron_output(weights, inputs): return sigmoid(dot(weights, inputs))
def _negative_log_partial_derivative(x: Vector, y: float, beta: Vector, j: int) -> float: """Calculate jth partial derivative for one row""" return -(y - logistic(dot(x, beta))) * x[j]
def perceptron_output(weights, bias, x): """returns 1 if the perceptron 'fires', 0 if not""" return step_function(dot(weights, x) + bias)
# and maximize using gradient descent beta_hat = maximize_batch(fn, gradient_fn, beta_0) print "beta_batch", beta_hat beta_0 = [1, 1, 1] beta_hat = maximize_stochastic(logistic_log_likelihood_i, logistic_log_gradient_i, x_train, y_train, beta_0) print "beta stochastic", beta_hat true_positives = false_positives = true_negatives = false_negatives = 0 for x_i, y_i in zip(x_test, y_test): predict = logistic(dot(beta_hat, x_i)) if y_i == 1 and predict >= 0.5: # TP: paid and we predict paid true_positives += 1 elif y_i == 1: # FN: paid and we predict unpaid false_negatives += 1 elif predict >= 0.5: # FP: unpaid and we predict paid false_positives += 1 else: # TN: unpaid and we predict unpaid true_negatives += 1 precision = true_positives / (true_positives + false_positives) recall = true_positives / (true_positives + false_negatives) print "precision", precision print "recall", recall
def project(v, w): projection_length = dot(v, w) return scalar_multiply(projection_length, w)
def cosine_similarity(v, w): return dot(v, w) / math.sqrt(dot(v, v) * dot(w, w))
# and maximize using gradient descent beta_hat = maximize_batch(fn, gradient_fn, beta_0) print "beta_batch", beta_hat beta_0 = [1, 1, 1] beta_hat = maximize_stochastic(logistic_log_likelihood_i, logistic_log_gradient_i, x_train, y_train, beta_0) print "beta stochastic", beta_hat true_positives = false_positives = true_negatives = false_negatives = 0 for x_i, y_i in zip(x_test, y_test): predict = logistic(dot(beta_hat, x_i)) if y_i == 1 and predict >= 0.5: # TP: paid and we predict paid true_positives += 1 elif y_i == 1: # FN: paid and we predict unpaid false_negatives += 1 elif predict >= 0.5: # FP: unpaid and we predict paid false_positives += 1 else: # TN: unpaid and we predict unpaid true_negatives += 1 precision = true_positives / (true_positives + false_positives) recall = true_positives / (true_positives + false_negatives) x1 = [1,56000] x2 = [3.7,60000]
random.seed(0) # so that you get the same results as me bootstrap_betas = bootstrap_statistic(zip(x, daily_minutes_good), estimate_sample_beta, 100) bootstrap_standard_errors = [ standard_deviation([beta[i] for beta in bootstrap_betas]) for i in range(4)] print "bootstrap standard errors", bootstrap_standard_errors print print "p_value(30.63, 1.174)", p_value(30.63, 1.174) print "p_value(0.972, 0.079)", p_value(0.972, 0.079) print "p_value(-1.868, 0.131)", p_value(-1.868, 0.131) print "p_value(0.911, 0.990)", p_value(0.911, 0.990) print print "regularization" random.seed(0) for alpha in [0.0, 0.01, 0.1, 1, 10]: beta = estimate_beta_ridge(x, daily_minutes_good, alpha=alpha) print "alpha", alpha print "beta", beta print "dot(beta[1:],beta[1:])", dot(beta[1:], beta[1:]) print "r-squared", multiple_r_squared(x, daily_minutes_good, beta) print
x_train, x_test, y_train, y_test = train_test_split(rescaled_x, y, 0.33) # want to maximixe log likelihood on the training data fn = partial(logistic_log_likelihood, x_train, y_train) gradient_fn = partial(logistic_log_gradient, x_train, y_train) beta_0 = [random.random() for _ in range(3)] beta_hat = maximize_batch(fn, gradient_fn, beta_0) print(beta_hat) true_positive = false_positive = true_negative = false_negative = 0 for x_i, y_i in zip(x_test, y_test): predict = logistic(dot(beta_hat, x_i)) if y_i == 1 and predict >= 0.5: true_positive += 1 elif y_i == 1: false_negative += 1 elif predict >= 0.5: false_positive += 1 else: true_negative += 1 precision = true_positive / (true_positive + false_positive) recall = true_positive / (true_positive + false_negative) print(precision) print(recall)