def predict(theta1, theta2, input_layer): layer2 = sigmoid.sigmoid( theta1.dot(input_layer)) # (25, 401) * (401, 5000) = (25 * 5000) layer2_new = np.r_[np.ones((1, input_layer.shape[1])), layer2] # (26 * 5000) output_layer = sigmoid.sigmoid( theta2.dot(layer2_new)) # (10, 26) * (26 * 5000) = (10 * 5000) return np.argmax(output_layer, axis=0) + 1
def predict(t1, t2, X): row, _ = X.shape X = np.concatenate([np.ones((X.shape[0], 1)), X], axis=1) a1 = sigmoid(X @ t1.T) pad = np.ones((row, 1)) a1 = np.concatenate((pad, a1), axis=1) a2 = sigmoid(a1 @ t2.T) p = np.argmax(a2, axis=1) return p
def compute3(x, y, theta): m = y.size value = x.dot(theta) v = 1.0 for idx, item in enumerate(y): if item == 1: v *= sigmoid.sigmoid(value[idx, 0]) else: v *= (1 - sigmoid.sigmoid(value[idx, 0])) ret = -np.log(v) / m if np.isnan(ret): return np.inf return ret
def negSamplingCostAndGradient(predicted, target, outputVectors, dataset, K=10): """ Negative sampling cost function for word2vec models Implement the cost and gradients for one predicted word vector and one target word vector as a building block for word2vec models, using the negative sampling technique. K is the sample size. Note: See test_word2vec below for dataset's initialization. Arguments/Return Specifications: same as softmaxCostAndGradient """ # Sampling of indices is done for you. Do not modify this if you # wish to match the autograder and receive points! indices = [target] indices.extend(getNegativeSamples(target, dataset, K)) # cost from target sample uo = outputVectors[target, ].reshape((1, -1)) # (1, d) vc = predicted.reshape((-1, 1)) # (d, 1) uovc = np.dot(uo, vc) s_uovc = sigmoid(uovc) cost_from_ts = -1 * np.log(s_uovc) # cost from negative sample uk = outputVectors[indices[1:], ] # (mk, d) ukvc = np.dot(uk, vc) # (mk, 1) s_ukvc = sigmoid(-1 * ukvc) # (mk, 1) cost_from_ns = -1 * np.sum(np.log(s_ukvc)) cost = cost_from_ts + cost_from_ns # gradient of the predict vectors grad = np.zeros_like(outputVectors) gradPred = ((s_uovc - 1) * outputVectors[target, ] - np.dot( (s_ukvc.T - 1), uk)).reshape((-1, )) grad_k = -1 * np.dot((s_ukvc - 1.0), predicted.reshape((1, -1))) grad_target = (s_uovc - 1) * predicted for i, k in enumerate(indices[1:]): grad[k] += grad_k[i] grad[target] = grad_target return cost, gradPred, grad
def get_crown_burn(self, FGPathway_object, loc, weather_today, sppr_dec): """Determines whether the overstory trees burn in a given fire RETURNS ------- boolean, True indicates that this cell has a burned crown, False not. """ #in the default model, ladder fuels range from 0 to 1.3 or so ladder_fuels = FGPathway_object.get_ladder_fuel(loc) #fwi varies from 0 to over 100, and are in units of meters/minute fwi = weather_today["FWI"] #forming an index for crown-fire risk, based on the ladder fuel load, and # weather conditions severe_fwi = 20 #meters per minute... seems like 20 is getting pretty fast?? adjusted_fwi = sigmoid(min( fwi/severe_fwi, severe_fwi), center=severe_fwi, min_val=0.0, max_val=2.0) crowning_danger_index = adjusted_fwi + ladder_fuels #the cut-off for crown-fire flash_point = 1.5 if crowning_danger_index > flash_point: return True else: return False
def gradient_reg(theta, reg, *args): y = args[1] X = args[0] m = y.size h = sigmoid.sigmoid(X.dot(theta.reshape(-1, 1))) grad = (1 / m) * X.T.dot(h - y) + (reg/m) * np.r_[[[0]], theta[1:].reshape(-1, 1)] return grad.flatten()
def cost_function_reg(theta, reg, *args): y = args[1] X = args[0] m = y.size h = sigmoid.sigmoid(X.dot(theta)) J = -1 * (1 / m) * (np.log(h + epsilon).T.dot(y) + np.log(1 - h + epsilon).T.dot(1 - y)) + (reg/(2*m))*np.sum(np.square(theta[1:])) return J[0]
def predict(theta, X): m = X.shape[0] p = np.zeros(m) p = np.round(sigmoid(X.dot(theta))) return p
def ladder_fuel_function_PIPO(self, stand_age, years_since_fire): """The ladder fuel value in a stand given the time since a fire. In Pinus ponderosa stands, the historical fire regime which maintained old trees had return intervals from 1-30 years. With fire exclusion, the understory will begin to fill with shade tolerant species. They should start representing a threat of crown fire sometime after 30 years or so, to simulate this historical dynamic. """ #lodgepole-style build-up pattern: #change parameter 'center' to adjust where fuel hits it's halfway point lodgepole_style_fuels = sigmoid(x=min(years_since_fire,stand_age), center=30, min_val=0.0, max_val=1.3) #in lodgepole, pretty much any fire is stand-replacing. In Ponderosa, after about age 20, # stands are very resilient to fire. Surface fires then act to reduce ladder fuels and # maintain the overstory #so here, instead of stand age, the years_since_fire value is used. As long as fires of some #kind are happening every 30 years or so, the ladder fuel value will never rise very high # (in the case above, with center=30, max=1.3, at thirty years, the fuel_loading will be 0.65) return lodgepole_style_fuels
def gradient(theta, X, y, _lambda): m, n = X.shape from utils.sigmoid import sigmoid h = sigmoid(np.dot(X, theta)) t = h.T - y tmp = np.dot(t, X) grad_reg_without_reg = tmp / m return grad_reg_without_reg.flatten()
def predict_tweet(tweet, freqs, theta): # extract the features of the tweet and store it into x x = extract_features.extract_features(tweet, freqs) # make the prediction using x and theta y_pred = sigmoid.sigmoid(np.dot(x, theta)) return y_pred
def predict(Theta1, Theta2, X): # PREDICT Predict the label of an input given a trained neural network # p = PREDICT(Theta1, Theta2, X) outputs the predicted label of X given the # trained weights of a neural network (Theta1, Theta2) # Useful values if X.ndim == 1: X = np.reshape(X, (-1, X.shape[0])) m = np.shape(X)[0] num_labels = np.shape(Theta2)[0] # You need to return the following variables correctly X = np.column_stack((np.ones((m, 1)), X)) a2 = sigmoid(np.dot(X, Theta1.T)) a2 = np.column_stack((np.ones((m, 1)), a2)) a3 = sigmoid(np.dot(a2, Theta2.T)) p = a3.argmax(axis=1) + 1 return p
def predictOneVsAll(all_theta, X): num_labels, _ = all_theta.shape X = np.insert(X, 0, 1, axis=1) from utils.sigmoid import sigmoid max_value = sigmoid(np.dot(X, all_theta.T)) p = max_value.argmax(axis=1) return p
def gradient_descent(x, y, alpha, iteration): m = y.size theta = np.zeros(3).reshape(3, -1) j_list = [] for i in range(iteration): v = computer_cost.compute2(x, y, theta) j_list.append(v) values = (sigmoid.sigmoid(x.dot(theta)) - y).T.dot(x).T * (alpha / m) theta = theta - values return theta, np.array(j_list)
def predictOneVsAll(all_theta, X): row, col = X.shape X_0 = np.ones((row, 1)) X = np.concatenate((X_0, X), axis=1) hypothesis = sigmoid(X @ all_theta.T) p = np.argmax(hypothesis, axis=1) return p
def costFunction(theta, X, y): # Initialize useful parameters m = len(y) J = 0 grad = np.zeros(theta.shape) hypothesis = sigmoid(X.dot(theta)) J = (1 / m) * np.sum(-y.dot(np.log(hypothesis)) - (1 - y).dot(np.log(1 - hypothesis))) grad = (1 / m) * (hypothesis - y).dot(X) return (J, grad)
def lrCostFunction(theta, X, y, l): m = len(y) hypothesis = sigmoid(X @ theta) reg_parameter = (l / (2 * m)) * np.sum(np.power(theta[1:], 2)) J = (1 / m) * np.sum((-(y.T) @ np.log(hypothesis)) - (1 - y.T) @ (np.log(1 - hypothesis))) + reg_parameter grad = (1 / m) * X.T @ (hypothesis - y) grad_parameter = theta[1:] * (l / m) grad[1:] += grad_parameter return J, grad
def costFunctionReg(theta, X, y, lamda = 1): # Initialize useful parameters m = len(y) J = 0 grad = np.zeros(theta.shape) hypothesis = sigmoid(X.dot(theta)) regularization_parameter = (lamda / (2 * m)) * np.sum(np.power(theta[1:theta.shape[0]],2)) J = (1 / m) * np.sum(-y.dot(np.log(hypothesis)) - (1 - y).dot(np.log(1 - hypothesis))) + regularization_parameter grad = (1 / m) * (hypothesis - y).dot(X) grad_parameter = theta[1:grad.shape[0]] * lamda / m grad[1:grad.shape[0]] = grad[1:grad.shape[0]] + grad_parameter return (J,grad)
def main(): x = np.linspace(-8, 8, 1000) y = sigmoid.sigmoid(x) plot_data.plot(x, y, 'x', 'y', { 'fmt': 'b-', 'title': 'sigmoid', 'label': 'sigmoid', 'show': False }) y2 = [] for _ in x: y2.append(0.5) y2 = np.array(y2) plot_data.plot(x, y2, 'x', 'y', { 'fmt': 'g-', 'label': 'y = 0.5', 'show': True })
def test_center(): #function signature: #sigmoid(x, center=0.0, min_val=0.0, max_val=1.0) test_count = 1000 sample_count = 1 for i in range(test_count): a = random.uniform(-1000,1000) b = random.uniform(-1000,1000) while a == b: b = random.uniform(-1000,1000) max_val = max(a, b) min_val = min(a, b) center = random.uniform(-1000,1000) mid_val = (max_val - min_val)/2.0 + min_val for j in range(sample_count): center_val = sigmoid.sigmoid(x=center, center=center, min_val=min_val, max_val=max_val) assert np.allclose( center_val , mid_val)
def _forward_propagate(self, X, w, b): """ Computes h = sigmoid(w*x + b) Parameters ---------- X : ndarray, shape (m_samples, n_features) Training data. w : ndarray, shape (n_features, 1) Coefficient vector. y : ndarray, shape (m_samples, 1) Array of labels. Returns ------- h : ndarray, shape(m_samples, 1) Activation values (hypothesis) (probability to be class 1) """ z = np.dot(X, w) + b h = sigmoid(z) return h
def test_min_max(): #function signature: #sigmoid(x, center=0.0, min_val=0.0, max_val=1.0) test_count = 1000 sample_count = 1000 for i in range(test_count): a = random.uniform(-1000,1000) b = random.uniform(-1000,1000) while a == b: b = random.uniform(-1000,1000) max_val = max(a, b) min_val = min(a, b) center = random.uniform(-1000,1000) samples = [ sigmoid.sigmoid(random.uniform(-10000,10000), center, min_val, max_val) for j in range(sample_count)] sample_range = max_val - min_val assert (min(samples) - min_val) < (0.05 * sample_range) assert (max_val - max(samples)) < (0.05 * sample_range)
def main(): fig, axes = plt.subplots(1, 3, sharey=True, figsize=(17, 5)) data = load_data.load('data2.txt', dtype=np.float128) X = data[:, 0:2] X_map = feature_map.map(X) y = data[:, 2].reshape(-1, 1) initial_theta = np.zeros(X_map.shape[1]) #C = 0 #res = minimize(cost_function_reg, initial_theta, args=(C, X_map, y), method=None, jac=gradient_reg, options={'maxiter': 3000}) #print(res) for i, C in enumerate([0, 1, 100]): # Optimize costFunctionReg res2 = minimize(cost_function_reg, initial_theta, args=(C, X_map, y), method=None, jac=gradient_reg, options={'maxiter': 3000}) accuracy = 100 * sum(predict(res2.x, X_map) == y.ravel()) / y.size plotData(data, 'Microchip Test 1', 'Microchip Test 2', 'y = 1', 'y = 0', axes.flatten()[i]) # Plot decisionboundary x1_min, x1_max = X[:, 0].min(), X[:, 0].max(), x2_min, x2_max = X[:, 1].min(), X[:, 1].max(), xx1, xx2 = np.meshgrid(np.linspace(x1_min, x1_max), np.linspace(x2_min, x2_max)) h = sigmoid.sigmoid( feature_map.map(np.c_[xx1.ravel(), xx2.ravel()]).dot(res2.x.reshape(-1, 1))) h = h.reshape(xx1.shape) axes.flatten()[i].contour(xx1, xx2, h, [0.5], linewidths=1, colors='g') axes.flatten()[i].set_title( 'Train accuracy {}% with Lambda = {}'.format( np.round(accuracy, decimals=2), C)) plt.show()
def predict(score1, score2, theta): return sigmoid.sigmoid(np.array([1, score1, score2]).dot(theta.reshape(3, -1)).sum())
def nnCostFunction(nn_params, input_layer_size, hidden_layer_size, num_labels, X, y, _lambda): # NNCOSTFUNCTION Implements the neural network cost function for a two layer # neural network which performs classification # [J grad] = NNCOSTFUNCTON(nn_params, hidden_layer_size, num_labels, ... # X, y, lambda ) computes the cost and gradient of the neural network.The # parameters for the neural network are "unrolled" into the vector # nn_params and need to be converted back into the weight matrices. # # The returned parameter grad should be a "unrolled" vector of the # partial derivatives of the neural network. # # Reshape nn_params back into the parameters Theta1 and Theta2, the weight matrices # for our 2 layer neural network Theta1 = np.reshape(nn_params[:hidden_layer_size * (input_layer_size + 1)], (hidden_layer_size, input_layer_size + 1), order='F') Theta2 = np.reshape(nn_params[hidden_layer_size * (input_layer_size + 1):], (num_labels, hidden_layer_size + 1), order='F') # Setup some useful variables m = len(X) # # You need to return the following variables correctly J = 0 Theta1_grad = np.zeros(Theta1.shape) Theta2_grad = np.zeros(Theta2.shape) # ====================== YOUR CODE HERE ====================== # Instructions: You should complete the code by working through the # following parts. # # Part 1: Feedforward the neural network and return the cost in the # variable J. After implementing Part 1, you can verify that your # cost function computation is correct by verifying the cost # computed in ex4.m # # Part 2: Implement the backpropagation algorithm to compute the gradients # Theta1_grad and Theta2_grad. You should return the partial derivatives of # the cost function with respect to Theta1 and Theta2 in Theta1_grad and # Theta2_grad, respectively. After implementing Part 2, you can check # that your implementation is correct by running checkNNGradients # # Note: The vector y passed into the function is a vector of labels # containing values from 1..K. You need to map this vector into a # binary vector of 1's and 0's to be used with the neural network # cost function. # # Hint: We recommend implementing backpropagation using a for-loop # over the training examples if you are implementing it for the # first time. # # Part 3: Implement regularization with the cost function and gradients. # # Hint: You can implement this around the code for # backpropagation. That is, you can compute the gradients for # the regularization separately and then add them to Theta1_grad # and Theta2_grad from Part 2. # # add bias 1 X = np.column_stack((np.ones((m, 1)), X)) # a2 = X . dot Theta1.T a2 = sigmoid(np.dot(X, Theta1.T)) # a2 add bias a2 = np.column_stack((np.ones((a2.shape[0], 1)), a2)) a3 = sigmoid(np.dot(a2, Theta2.T)) # Also, recall that whereas the original labels (in the variable y) were 1, 2, ..., 10, for the purpose of # training a neural network, we need to recode the labels as vectors containing only values 0 or 1, so that For # example, if x(i) is an image of the digit 5, then the corresponding y(i) (that you should use with the cost # function) should be a 10-dimensional vector with y5 = 1, and the other elements equal to 0 labels = y y = np.zeros((m, num_labels)) for i in range(m): y[i, labels[i] - 1] = 1 # You should implement the feedforward computation that computes hθ(x(i)) for every example i and sum the cost # over all examples. Your code should also work for a dataset of any size, with any number of labels (you can # assume that there are always at least K ≥ 3 labels). cost = 0 for i in range(m): cost += np.sum(y[i] * np.log(a3[i]) + (1 - y[i]) * np.log(1 - a3[i])) J = -(1.0 / m) * cost theta1_square = np.sum(np.square(Theta1[:, 1:])) theta2_square = np.sum(np.square(Theta2[:, 1:])) J = J + _lambda * (theta1_square + theta2_square) / 2 / m from ex4_NN_back_propagation.sigmoidGradient import sigmoidGradient delta_l_1 = 0 delta_l_2 = 0 for i in range(m): # 1. Set the input layer’s values (a(1)) to the t-th training example x(t). Perform a feedforward pass ( # Figure 2), computing the activations (z(2), a(2), z(3), a(3)) for layers 2 and 3. Note that you need to add # a +1 term to ensure that the vectors of activations for layers a(1) and a(2) also include the bias unit. In # Octave, if a 1 is a column vector, adding one corresponds to a 1 = [1 ; a 1]. a1 = X[i] z2 = np.dot(a1, Theta1.T) a2 = sigmoid(z2) a2 = np.concatenate((np.ones(1), a2)) z3 = np.dot(a2, Theta2.T) a3 = sigmoid(z3) # 2. For each output unit k in layer 3 (the output layer), set δ(3) = (a(3) − yk), where yk ∈ {0,1} indicates # whether the current training example belongs to class k (yk = 1), or if it belongs to a different class ( # yk = 0). You may find logical arrays helpful for this task (explained in the previous programming # exercise). delta3 = np.zeros(num_labels) for l in range(num_labels): delta3[l] = a3[l] - y[i, l] # 3. For the hidden layer l = 2, set de # delta 2 = theta(2).T * theta(3) * g_pi(z2)) delta2 = np.dot(Theta2[:, 1:].T, delta3) * sigmoidGradient(z2) # 4. Accumulate the gradient from this example using the following formula. Note that you should skip or # remove δ(2). In Octave, removing δ(2) corresponds to delta 2 = delta 2(2:end). # ∆(l) = ∆(l) + δ(l+1)(a(l))T delta_l_1 += np.outer(delta2, a1.T) delta_l_2 += np.outer(delta3, a2.T) # 5. Obtain the (unregularized) gradient for the neural network cost function by dividing the accumulated # gradients by 1/m Theta1_grad = delta_l_1 / m Theta2_grad = delta_l_2 / m # After you have successfully implemeted the backpropagation algorithm, you will add regularization to the # gradient. To account for regularization, it turns out that you can add this as an additional term after # computing the gradients using backpropagation. Specifically, after you have computed ∆(l) using # backpropagation, you ij should add regularization using Theta1_grad_unreg = np.copy(Theta1_grad) Theta2_grad_unreg = np.copy(Theta2_grad) Theta1_grad += _lambda / m * Theta1 Theta2_grad += _lambda / m * Theta2 Theta1_grad[:, 0] = Theta1_grad_unreg[:, 0] Theta2_grad[:, 0] = Theta2_grad_unreg[:, 0] # Unroll gradients grad = np.concatenate((Theta1_grad.reshape(np.size(Theta1_grad), order='F'), Theta2_grad.reshape(Theta2_grad.size, order='F'))) return J, grad
def costFunction(theta, X, y, _lambda): m, n = X.shape from utils.sigmoid import sigmoid item1 = y * (np.log(sigmoid(np.dot(X, theta)))) item2 = (1 - y) * (np.log(1 - sigmoid(np.dot(X, theta)))) return np.sum(-item1 - item2) / m
def gradient(theta, X, y): m = y.size h = sigmoid.sigmoid(X.dot(theta.reshape(-1, 1))) grad = (1 / m) * X.T.dot(h - y) return grad.flatten()
jac=True, method='TNC', options=options) cost = res.fun theta = res.x print('Cost at theta found by optimize.minimize: {:.3f}'.format(cost)) print('Expected cost (approx): 0.203') print('Theta:{:.3f}, {:.3f}, {:.3f}'.format(*theta)) print('Expected theta (approx):-25.161, 0.206, 0.201') # Plot decision boundary plotDecisionBoundary(theta, X_padded, y) # Predict probability for a student with score 45 on exam 1 and score 85 on exam 2 grades = np.array([1, 45, 85]) prob = sigmoid(grades.dot(theta)) print( 'For a student with scores 45 and 85, we predict an admission probability of {:.3f}' .format(prob)) print('Expected value: 0.775 +/- 0.002') # Compute accuracy on our training set p = predict(theta, X_padded) accuracy = np.mean(y == p) * 100 print('Train Accuracy: {}%'.format(accuracy)) print('Expected accuracy (approx): 89.0')
def cost_function(theta, X, y): m = y.size h = sigmoid.sigmoid(X.dot(theta)) J = -1 * (1 / m) * (np.log(h + epsilon).T.dot(y) + np.log(1 - h + epsilon).T.dot(1 - y)) return J[0]
def compute2(x, y, theta): m = y.size h = sigmoid.sigmoid(x.dot(theta)) j = -1 * (1 / m) * (np.log(h + epsilon).T.dot(y) + np.log(1 - h + epsilon).T.dot(1 - y)) return j[0]
def predict(theta, X, threshold=0.5): p = sigmoid.sigmoid(X.dot(theta.T)) >= threshold return (p.astype('int'))