def compute_error_per_sample(theta, x, y): assert(mathutil.is_np_1d_array(theta)) assert(theta.size == 2) num_samples = x.size aug_x = get_aug_x(x) h_theta_x = np.sum(theta*aug_x, axis = 1) error_per_sample = h_theta_x - y assert(mathutil.is_np_1d_array(error_per_sample)) assert(error_per_sample.size == num_samples) return error_per_sample
def compute_gradient(theta, aug_x, y, lagrange_lambda): n_samples, n_features = aug_x.shape assert(np.ndim(theta)==1) assert(np.size(theta) == n_features) inner_term = np.dot(aug_x, theta) hyp_h_theta = mathutil.sigmoid_fn(inner_term) assert(mathutil.is_np_1d_array(y)) assert(mathutil.is_np_1d_array(hyp_h_theta)) jac = (1/n_samples) * np.dot(aug_x.transpose(),(hyp_h_theta - y)) assert(np.ndim(jac)==1) assert(np.size(jac)==n_features) regularization_pull = (lagrange_lambda/n_samples)*theta jac_with_regularization = jac + regularization_pull assert(not np.any(np.isnan(jac_with_regularization))) return jac_with_regularization
def compute_linear_regression_gradient(theta, x, y, lagrange_lambda): """ Compute the linear regression gradient with the regularization pull .. math:: \\frac{\\partial J(\\theta)}{\\partial \\theta_0} = \\frac{1}{m} \\sum_{i=1}^{m-1} (h_\\theta(x^{(i)}) - y^{(i)}) x_j^{(i)} \,for \,j=0 \\frac{\\partial J(\\theta)}{\\partial \\theta_j} = \\frac{1}{m} \\sum_{i=1}^{m-1} (h_\\theta(x^{(i)}) - y^{(i)}) x_j^{(i)} + \\frac{\\lambda}{m} \\theta_j \,for \,j \\ge 1 :param theta: the hyper-plane params :type theta: list :param x: design matrix :type x: np array :param y: output :type y: np array """ error_per_sample = compute_error_per_sample(theta, x, y) num_samples = x.size aug_x = get_aug_x(x) inner_term = error_per_sample[:, np.newaxis] * aug_x summed_inner_term = np.sum(inner_term, axis=0) assert(mathutil.is_np_1d_array(summed_inner_term)) assert(summed_inner_term.size == 2) jacobian = (1/num_samples)*summed_inner_term jacobian_with_reg_pull = jacobian + np.array([0.0, lagrange_lambda*theta[1]/num_samples]) return jacobian_with_reg_pull
def compute_cost(theta, aug_x, y, lagrange_lambda): n_samples, n_features = aug_x.shape assert(np.ndim(theta) == 1) assert(np.size(theta) == n_features) with np.errstate(over = 'raise'): inner_term = np.dot(aug_x, theta) hyp_h_theta = mathutil.sigmoid_fn(inner_term) # h_Theta(x) assert(mathutil.is_np_1d_array(y)) assert(mathutil.is_np_1d_array(hyp_h_theta)) cost_per_sample = -y*np.log(hyp_h_theta) \ -(1-y)*np.log(1-hyp_h_theta) assert(not np.any(np.isnan(cost_per_sample))) average_cost = (1/n_samples)*np.sum(cost_per_sample) cost_plus_regularization = average_cost + \ (lagrange_lambda/(2*n_samples))*np.sum(theta**2) assert(not np.isnan(cost_plus_regularization)) return cost_plus_regularization
def get_theta_transfers_from_flattened_version(theta_transfers_flattened, theta_transfer_shapes): offset = 0 assert(mathutil.is_np_1d_array(theta_transfers_flattened)) theta_transfers = [] for theta_transfer_shape in theta_transfer_shapes: r, c = theta_transfer_shape theta_transfer = theta_transfers_flattened[offset:(offset+r*c)].reshape(r,c) theta_transfers.append(theta_transfer) offset += r*c assert(offset==theta_transfers_flattened.size) return theta_transfers
def multivariate_gaussian(X, mu, sigma_sq): if (mathutil.is_np_1d_array(sigma_sq)): sigma_sq = np.diag(sigma_sq) r, c = sigma_sq.shape assert(r == c) inv_sigma_sq = np.linalg.inv(sigma_sq) X_minus_mu = X-mu exp_term_0 = np.dot(X_minus_mu, inv_sigma_sq) exp_term = np.sum(exp_term_0 * X_minus_mu, axis=1) dist = (2*np.pi)**(-r/2)*np.linalg.det(sigma_sq)**(-0.5)*np.exp(-exp_term) return dist
def run_feedforward_nn_for_sample(theta_matrix_1, theta_matrix_2, sample): assert(mathutil.is_np_1d_array(sample)) n_features = np.size(sample) output_dim_1, input_dim_1 = theta_matrix_1.shape output_dim_2, input_dim_2 = theta_matrix_2.shape assert(n_features+1 == input_dim_1) a_1 = np.concatenate(([1.0], sample)) z_2= np.dot(theta_matrix_1, a_1) a_withoutbias_2 = mathutil.sigmoid_fn(z_2) a_2 = np.concatenate(([1.0], a_withoutbias_2)) assert( np.size(a_2) == input_dim_2 ) z_3 = np.dot(theta_matrix_2, a_2) predicted_output_vector = mathutil.sigmoid_fn(z_3) return np.argmax(predicted_output_vector) + 1
def generate_learning_curve(x, y, x_cv, y_cv): """ Generates learning curves by sweeping acrosss the size of the training set """ assert(mathutil.is_np_1d_array(x)) num_samples = x.size assert(num_samples>0) training_set_size = np.arange(1, num_samples) training_error = np.empty_like(training_set_size) cv_error = np.empty_like(training_set_size) for idx, curr_training_set_size in enumerate(training_set_size): x_train = x[:curr_training_set_size] y_train = y[:curr_training_set_size] initial_theta = np.ones((2), dtype=np.float64) trained_theta = train_linear_regression(initial_theta, x_train, y_train, 1.0) training_error[idx] = compute_linear_regression_cost(trained_theta, x_train, y_train, 0.0) cv_error[idx] = compute_linear_regression_cost(trained_theta, x_cv, y_cv, 0.0) plt_learning_curve(training_set_size, training_error, cv_error)
def unflatten_X_Theta(X_Theta_flattened, num_users, num_movies, num_features): assert(mathutil.is_np_1d_array(X_Theta_flattened)) assert(X_Theta_flattened.size == (num_users+num_movies)*num_features) X = X_Theta_flattened[0:(num_movies*num_features)].reshape((num_movies, num_features)) Theta = X_Theta_flattened[(num_movies*num_features):].reshape((num_users, num_features)) return X, Theta