def fit(self, trainset): AlgoBase.fit(self, trainset) n = trainset.n_users m = trainset.n_items #print(n,m) self.K = agnp.zeros((n,m)) self.R = agnp.zeros((n,m)) for u, i, rating in trainset.all_ratings(): ru, ri = self.add_to_known(u,i) self.K[ru,ri]=1 self.R[ru,ri]=rating self.U = agnp.random.normal(size = (n, self.latent_dimension)) self.M = agnp.random.normal(size = (self.latent_dimension, m)) self.C = agnp.array([[self.cat_products[self.from_known_ri(ri)] == c for c in range(len(self.cat_target))] for ri in range(m)]) self.fun_U = lambda U : (agnp.sum(self.K*(self.R - agnp.dot(U,self.M))**2)+ self.mu * (agnp.sum(U**2) + agnp.sum(self.M**2)) +self.lamb*agnp.sum((1/n * agnp.dot(agnp.dot(agnp.ones(n),agnp.dot(U, self.M)),self.C) -self.cat_target)**2)) self.fun_M = lambda M : (agnp.sum(self.K*(self.R - agnp.dot(self.U,M))**2)+ self.mu * (agnp.sum(self.U**2) + agnp.sum(M**2)) +self.lamb*agnp.sum((1/n * agnp.dot(agnp.dot(agnp.ones(n),agnp.dot(self.U, M)),self.C) -self.cat_target)**2)) self.grad_U = grad(self.fun_U) self.grad_M = grad(self.fun_M) for epoch in range(self.nb_main_epochs): self.M = gradient_descent(self.M, self.grad_M, N = 1, lr = self.lr, alpha = 1) self.U = gradient_descent(self.U, self.grad_U, N = 1, lr = self.lr, alpha = 1) self.lr*=self.alpha return self
def k_fold_function(t_rows, t_cols, X, y): #alpha_list=[0.001,0.00001] min_rmse = np.inf #min_alpha=0 k_folds = 10 k_fold_test_rows = int(t_rows / k_folds) k_fold_train_rows = t_rows - k_fold_test_rows for i in range(-6, 1): #print(alpha) rmse_array = [] for k_fold in range(0, k_folds): X_test = X[:k_fold_test_rows] y_test = y[:k_fold_test_rows] X_train = X[k_fold_test_rows:t_rows] y_train = y[k_fold_test_rows:t_rows] w = gradient_descent(X_train, y_train, k_fold_train_rows, t_cols, 10**i) #print(w) rmse_array.append(find_rmse_gradient_descent(X_test, y_test, w)) X_new = np.concatenate((X_train, X_test)) y_new = np.concatenate((y_train, y_test)) X = np.copy(X_new) y = np.copy(y_new) avg_rmse = np.average(rmse_array) if (math.isnan(avg_rmse)): avg_rmse = np.inf if (avg_rmse <= min_rmse): min_rmse = avg_rmse min_alpha = 10**i return (min_alpha, min_rmse)
def main(): # Set parameters degree = 20 eta = 2 max_iter = 200 # Load data and expand with polynomial features f = open('data_logreg.json', 'r') data = json.load(f) for k, v in data.items(): data[k] = np.array(v) # Encode list into numpy array # Expand with polynomial features X_train = logreg_toolbox.poly_2D_design_matrix(data['x1_train'], data['x2_train'], degree) n = X_train.shape[1] # Define the functions of the parameter we want to optimize def f(theta): return lr.cost(theta, X_train, data['y_train']) def df(theta): return lr.grad(theta, X_train, data['y_train']) # Test to verify if the computation of the gradient is correct logreg_toolbox.check_gradient(f, df, n) # Point for initialization of gradient descent theta0 = np.zeros(n) theta_opt, E_list = gd.gradient_descent(f, df, theta0, eta, max_iter) logreg_toolbox.plot_logreg(data, degree, theta_opt, E_list) plt.show()
def getdata(): reader = csv.reader(open("data2.csv", "rt"), delimiter=",") x = list(reader) result = np.array(x).astype("float") t_rows, t_cols = result.shape X = np.delete(result, t_cols - 1, 1) y = np.delete(result, np.s_[0:t_cols - 1], axis=1) X = np.power(X, 8) X = np.insert(X, 0, 1, axis=1) #Normalization X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3) mean = np.zeros((X_train.shape[1], 1), dtype=float) std_dev = np.zeros((X_train.shape[1], 1), dtype=float) for i in range(1, X_train.shape[1]): mean[i] = np.mean(X_train[:, i]) std_dev[i] = np.std(X_train[:, i]) for i in range(1, X_train.shape[1]): X_train[:, i] = (np.subtract(X_train[:, i], mean[i])) / std_dev[i] for i in range(1, X_test.shape[1]): X_test[:, i] = np.subtract(X_test[:, i], mean[i]) / std_dev[i] #plot_curve(X_train,y_train) [alpha, rmse] = k_fold_function(X_train.shape[0], t_cols, X_train, y_train) print(alpha) w = gradient_descent(X_train, y_train, X_train.shape[0], t_cols, alpha) #print(w) np.savetxt("w_curve_fitting_8.csv", w, delimiter=",") rmse = find_rmse_gradient_descent(X_test, y_test, w) print("Final RMSE for 30% data is {}".format(rmse))
def train_classifier(X, y, num_labels, L, iterations, alpha): # get n for Theta size n = X.shape[1] # Theta is the matrix where we will store the theta values of each classifier we train Theta = np.zeros((num_labels, n)) # J_histories is the matrix of the J_history for each classifier we train J_histories = np.zeros((num_labels, iterations)) # train Theta for each class for i in range(0, num_labels): # get class c = i + 1 # preprocess y to be binary where 1 is when y = c y_processed = y.copy() for t in range(y.shape[0]): y_processed[t] = 1 if y[t] == c else 0 # initialize theta initial_theta = np.zeros((n, 1)) # train theta for this class theta, J_history = gradient_descent(X, y_processed, initial_theta, alpha, iterations) # add this theta to Theta matrix Theta[i, :] = np.array(theta.T[0, :]) # add this J_history to the J_histories matrix J_histories[i, :] = J_history return Theta, J_histories
def train(data_mat, label_mat, epoches, alpha): max_iter_count = epoches step_alpha = alpha theta, cost_v = gradient_descent(data_mat, label_mat, step_alpha, max_iter_count) # theta, cost_v = normal_quations(data_mat, label_mat) return theta, cost_v
def main(): try: data = np.loadtxt(open(the_file, 'r'), delimiter=',') except: print('Failed to load data') sys.exit(1) X = np.matrix(data[:,0]).transpose() y = np.matrix(data[:,1]).transpose() # add column of ones: X = np.insert(X, 0, values=1,axis=1) theta = np.matrix('0;0') alpha = .01 iters = 1500 gradient_descent.gradient_descent(X, y, theta, alpha, iters)
def train_model(x, y, ax): # gradient descent settings (_, n) = x.shape iters = 1500 alpha = 0.01 theta = np.zeros(n) # compute and display initial cost print('Testing the cost function ...\n') j = compute_cost.compute_cost(x, y, theta) print(' With theta = [0.0, 0.0]') print(' Cost computed = %0.2f' % j) print(' Expected cost value (approx) 32.07\n') # run gradient descent print('Running Gradient Descent ...\n') (theta, j_history) = gradient_descent.gradient_descent(x, y, theta, alpha, iters) print(' Theta found by gradient descent:') print(' ', theta) print(' Expected theta values (approx):') print(' [-3.6303, 1.1664]\n') return (alpha, theta, j_history)
def runOnFolds(network, partitions): accuracies = np.zeros((5, 1)) precisions = np.zeros((5, 1)) recalls = np.zeros((5, 1)) # we iteratively concatenate folds together, leaving out a different # fold each time for x in xrange(0, 5): first_part = True test_set = partitions[x] for y in xrange(0, x): if first_part: training_set = partitions[y] first_part = False else: training_set = np.concatenate([training_set, partitions[y]]) for z in xrange(x + 1, 5): if first_part: training_set = partitions[z] first_part = False else: training_set = np.concatenate([training_set, partitions[z]]) #build the tree, get key values, find the average accuracy input_weights, hidden_weights = gradient_descent(network, training_set) accuracy, precision, recall = get_stats(test_set, input_weights, hidden_weights) accuracies[x] = accuracy precisions[x] = precision recalls[x] = recall return accuracies, precisions, recalls
def run_gradient_descent(f, v_start, dx=1e-6, max_steps=1000): print('===') v_min = gradient_descent(f, v_start, dx, max_steps) print(f'v_start: {v_start}') print(f'v_min: {v_min}') print("===") return v_min
def main(X, Y_list, lr, n_g_iter): x = X for _ in range(n_g_iter): f, g = _single_gradient_step(x, Y_list) print('\t\t Gradient Iteration %d: Energy = %f' % (_, f)) x = gradient_descent(x, g, lr=lr, norm=True) return x
def run_ros2(step, point_type): # Initial point if point_type == "const": x0 = np.array([-1.2, 1]) else: x0 = np.random.uniform(-4, 2, 2) print("Random initial point: ", x0) # Min point x_min = np.array([1, 1]) # Max number of iterations mxitr = 10000 # Tolerance for gradient tol_g = 1e-8 # Tolerance for x tol_x = 1e-8 # Tolerance for function tol_f = 1e-8 # Method for step update if step == "fijo": msg = "StepFijo" elif step == "hess": msg = "StepHess" else: msg = "Backtracking" # Gradient step size for "StepFijo" method step_size = 2e-3 # Estimate minimum point through gradient descent xs = gradient_descent(x0, mxitr, tol_g, tol_x, tol_f, f_rosenbrock_2, g_rosenbrock_2, msg, H_rosenbrock_2, step_size) # Print point x found and function value f(x) print("\nPoint x found: ", xs[-1]) print("\nf(x) = ", f_rosenbrock_2(xs[-1])) # Plot level sets and gradient path plot_level_set(xs, f_rosenbrock_2, -5.0, 2.0, -8.0, 8.0, x0, x_min)
def run_ros100(step, point_type): # Initial point if point_type == "const": x0 = np.ones(100) x0[0] = -1.2 x0[-2] = -1.2 else: x0 = np.random.uniform(-2, 2, 100) print("Random initial point: ", x0) # Min point x_min = np.ones(100) # Max number of iterations mxitr = 10000 # Tolerance for gradient tol_g = 1e-8 # Tolerance for x tol_x = 1e-8 # Tolerance for function tol_f = 1e-8 # Method for step update if step == "fijo": msg = "StepFijo" elif step == "hess": msg = "StepHess" else: msg = "Backtracking" # Gradient step size for "StepFijo" method step_size = 1e-5 # Estimate minimum point through gradient descent xs = gradient_descent(x0, mxitr, tol_g, tol_x, tol_f, f_rosenbrock_100, g_rosenbrock_100, msg, H_rosenbrock_100, step_size) # Print point x found and function value f(x) print("\nPoint x found: ", xs[-1]) print("\nf(x) = ", f_rosenbrock_100(xs[-1]))
def run_wood(step, point_type): # Initial point if point_type == "const": x0 = np.array([-3, -1, -3, -1]) else: x0 = np.random.uniform(-2,2,4) print("Random initial point: ", x0) # Min point x_min = np.array([1, 1, 1, 1]) # Max number of iterations mxitr = 10000 # Tolerance for gradient tol_g = 1e-8 # Tolerance for x tol_x = 1e-8 # Tolerance for function tol_f = 1e-10 # Method for step update if step == "fijo": msg = "StepFijo" elif step == "hess": msg = "StepHess" else: msg = "Backtracking" # Gradient step size for "StepFijo" method step_size = 5e-6 # Estimate minimum point through gradient descent xs = gradient_descent(x0, mxitr, tol_g, tol_x, tol_f, f_wood, g_wood, msg, H_wood, step_size) # Print point x found and function value f(x) print("\nPoint x found: ", xs[-1]) print("\nf(x) = ", f_wood(xs[-1]))
def train(X, y, alpha, lam, iter_num): k_, k = y.shape m, n = X.shape all_theta = np.matrix(np.zeros((k, n))) for i in range(iter_num): for j in range(k): theta = all_theta[j, :] theta = gradient_descent(theta, X, y[:, j], alpha, lam) all_theta[j, :] = theta return all_theta
def fit_gradient_descent(): """ Uses Gradient Descent (GD) to fit the ball parameters. :return theta: array containing the initial speed and the acceleration factor due to rolling friction. :rtype theta: numpy.array. :return history: history of points visited by the algorithm. :rtype history: list of numpy.array. """ theta, history = gradient_descent(cost_function, gradient_function, np.array([0.0, 0.0]), 0.1, 1.0e-10, 1000) return theta, history
def run_gradient_descent(feature_matrix, output_colvec, num_examples, num_features, alpha, num_iters, fig, subplot, theta_colvec=None, normal_eq=False, debug=False): """Run Gradient Descent/Normal Equation. 1) num_examples - number of training samples 2) num_features - number of features 3) feature_matrix - num_examples x (num_features + 1) 4) output_colvec - num_examples x 1 col vector 5) alpha - alpha value for gradient descent 6) num_iters - number of iterations 7) theta_colvec - (num_features + 1) x 1 col vector initial values of theta 8) debug - print debug info """ print('Running Gradient Descent ...') if not theta_colvec: theta_colvec = np.zeros(shape=(num_features + 1, 1)) cost_hist = None if normal_eq: theta_colvec = \ normal_equation(feature_matrix, output_colvec) print(f'Theta found by normal equation : {theta_colvec}') else: theta_colvec, cost_hist = \ gradient_descent(feature_matrix, output_colvec, num_examples, num_features, alpha, num_iters, theta_colvec, debug=debug) print(f'Theta found by gradient descent: {theta_colvec}') if num_features == 1: line_plot(feature_matrix[:, 1], feature_matrix @ theta_colvec, marker='x', label='Linear regression', color='b', markersize=2, fig=fig, subplot=subplot) util.pause('Program paused. Press enter to continue.') return theta_colvec, cost_hist
def fit_iso(name, dist, plot=1): if name != 'Pal5': clus_data = fi.read_data("noU_NGC_"+name+"_cluster.csv", ",") else: clus_data = fi.read_data("noU_"+name+"_cluster.csv", ",") if dist < 10.0: high = 7.0 elif dist < 20.0: high = 6.0 else: high = 5.5 clus = format_data(clus_data, dist, cuts=(3.5, high)) iso_data, age = [], [] for i in range(len(mod)): try: filename = "./Giso_shifted/"+name+mod[i]+".dat" iso_data.append(fi.read_data(filename)) age.append(d_age[i]) except IOError: print "!!! File not found - {0}".format(filename) continue """ Get R-squared values for each isochrone""" iso, RR = [], [] for i in range(len(iso_data)): iso.append(format_isochrone(iso_data[i], cuts=(7.0, 3.5))) results = poly.poly_fit(0.0,iso[i][:,0],iso[i][:,1],iso[i][:,2], 6, verbose=0) RR.append(R_squared(clus, results)) points = sc.array(zip(age, RR)) max = sc.ma.max(points[:,1]) RRmod = -1.0*(points[:,1] - max) sigma = 0.1*sc.ones(len(RRmod)) c, d, start = mc.do_MCMC(func.gaussian_function, sc.array([0.1, 0.0, 0.2]), sc.array([0.001,0.001,0.001]), points[:,0], RRmod, sigma, "test", number_steps=10000, save=0) best = gd.gradient_descent(func.gaussian_function, start, points[:,0], RRmod, sigma) # One-dimensional errors if len(RR) != 5: error = np.sqrt(abs((8.0*0.2*0.2) / (2.0*(RR[-1]-RR[0])) ) ) #Uses last point, which should be the '-2' else: error = np.sqrt(abs((8.0*0.2*0.2) / (RR[2] + RR[4] - 2.0*RR[0]))) # Hesssian error for one parameter fit # Plot these bad boys if plot==1: plt.figure(1) plt.subplot(211) plt.errorbar(clus[:,0], clus[:,1], yerr=clus[:,2], fmt='o') for i in range(len(iso)): plt.plot(iso[i][:,0], (iso[i][:,1]) ) plt.subplot(212) plt.scatter(points[:,0], points[:,1]) x = sc.arange(plt.xlim()[0], plt.xlim()[1], 0.01) y = -1.0*(func.gaussian_function(x, best)) + max #func.gaussian_function(x, best) # plt.plot(x,y, 'g-') plt.savefig(name+"_isoAn2.ps", papertype='letter') plt.close('all') return (sc.array(zip(age,RR)), best, error)
def compare_with_diff_size(): """ 在不同数据量下,使用两种算法对同一模型做估计 """ re = [] dimension = 20 model = create_linear_model(dimension) for i in range(1, 11): num = 10000 * i X, Y = generate_linear_data(dimension, num) # 使用梯度下降法估计模型 start_time = timeit.default_timer() gradient_descent(X, Y, model) end_time = timeit.default_timer() gd_time = end_time - start_time # 使用随机梯度下降法估计模型 start_time = timeit.default_timer() stochastic_gradient_descent(X, Y, model) end_time = timeit.default_timer() sgd_time = end_time - start_time re.append((num, gd_time, sgd_time)) return re
def learn(X, y, alpha, num_iters): # Adding intercept term to X X = np.insert(X, 0, 1, axis=1) # Getting size of training data training_size, feature_size = X.shape # Initializing learning parameters theta = np.zeros((feature_size, 1)) theta, J_history = gradient_descent.gradient_descent( X, y, theta, alpha, num_iters, compute_cost) # plotting the cost in each iteration # TODO Need to add label to x as 'Number of iterations' and y as 'Cost J' plt.plot(J_history) # plt.show() return theta
def calibrate_neural_network(K, M, p, sigma, sigma_prime, y, x, gradient_descent_method, **kwargs): """ Calibrates a neural network with one hidden layer for classification. Args: K (int): Number of classes for classification M (int): Number of neurons in the hidden layer p (int): Number of features in `x` sigma (function: array-like to array-like): Activation function (e.g. sigmoid function). sigma_prime (function: array-like to array-like): Derivative of sigma. y (1d NumPy array): Y observations in the training set x (2d NumPy array): X observations in the training set gradient_descent_method (string): Selects the gradient descent method. The available methods are: "Gradient Descent" and "Mini-batch Gradient Descent". **kwargs: arguments passed to the optimiser Returns: array-like: returns the approximated value of x such that f(x) = 0 given the algorithm interruption conditions. """ y_ = np.zeros((len(y), 10)) for i in range(10): y_[y == i, i] = 1 y = y_ #Setting up the initial guess x0 = np.zeros((p+1)*M + (M+1)*K) alpha = np.eye(p, M) beta = np.eye(M, K) x0[M:(p+1)*M] = alpha.reshape(M * p, order='F') x0[(p+1)*M + K:] = beta.reshape(K * M, order='F') #Setting up the gradient functions gradient_function = (lambda theta: neural_network_gradient(x, y, sigma, sigma_prime, *theta_map(theta, p, M, K))) sampled_gradient_function = (lambda theta, sample_bool: neural_network_gradient(x[sample_bool, :], y[sample_bool], sigma, sigma_prime, *theta_map(theta, p, M, K))) # Performing the optimisation if gradient_descent_method == "Gradient Descent": return gradient_descent(x0, gradient_function, **kwargs) if gradient_descent_method == "Mini-batch Gradient Descent": return mini_batch_gradient_descent(x0, x.shape[0], sampled_gradient_function, **kwargs) return None
def get_optimal_value(LS_oracle, args): if LS_oracle.get_strong_convexity( ) < 1e-6: # if A^T A is not invertible - get f* by GD x0 = sample_uniform_ball(args.r, args.d) beta = LS_oracle.get_smoothness() gradient_iterates = gradient_descent( x0, grad_func=LS_oracle.get_grad, step_size=1 / beta, max_steps=10 * args.num_grad_steps, proj=lambda x: l2_ball_proj(x, args.R)) return LS_oracle.get_value(gradient_iterates[-1]) else: return LS_oracle.get_value(LS_oracle.get_analytic_solution())
def main(): # Set parameters degree = 15 eta = 10 max_iter = 500 # Load data and expand with polynomial features f = open('data_logreg.json', 'r') data = json.load(f) for k, v in data.items(): data[k] = np.array(v) # Encode list into numpy array # Expand with polynomial features X_train = logreg_toolbox.poly_2D_design_matrix(data['x1_train'], data['x2_train'], degree) n = X_train.shape[1] # Define the functions of the parameter we want to optimize def f(theta): return lr.cost(theta, X_train, data['y_train']) def df(theta): return lr.grad(theta, X_train, data['y_train']) # Test to verify if the computation of the gradient is correct logreg_toolbox.check_gradient(f, df, n) # Point for initialization of gradient descent theta0 = np.zeros(n) #### VARIANT 1: Optimize with gradient descent theta_opt, E_list = gd.gradient_descent(f, df, theta0, eta, max_iter) #### VARIANT 2: Optimize with gradient descent """ theta_opt, E_list, lr_list = gd.adaptative_gradient_descent(f, df, theta0, eta, max_iter) plt.plot(lr_list) plt.xlabel('Iterations') plt.ylabel('Learning rate') print('Adaptative gradient, final learning rate: {:.3g}'.format(lr_list[-1])) """ #### VARIANT 3: Optimize with gradient descent #res = minimize(f, x0=theta0, jac=df, options={'disp': True ,'maxiter': max_iter}) #theta_opt = res.x.reshape((n, 1)) #E_list = [] logreg_toolbox.plot_logreg(data, degree, theta_opt, E_list) plt.show()
def runOnFull(network, examples): accuracies = np.zeros((1, 1)) precisions = np.zeros((1, 1)) recalls = np.zeros((1, 1)) input_weights, hidden_weights = gradient_descent(network, examples) accuracy, precision, recall = get_stats(examples, input_weights, hidden_weights) accuracies[0] = accuracy precisions[0] = precision recalls[0] = recall return accuracies, precisions, recalls
def execute(self): # run awwwwwwwwwwwwwwway r = rospy.Rate(5) while not(rospy.is_shutdown()): old, new = self.lazer_sub.get_scans() guess_del = self.odom_sub.get_deltas() if old != None and new != None and guess_del != None: angle = tf.transformations.euler_from_quaternion(self.current_quat)[2] res_func = lambda vect: rsd.residual(old,new,vect[0,0],vect[1,0],vect[2,0],angle) actual_del = grd.gradient_descent(res_func,guess_del) self.compute_new_pose(actual_del) self.publish_pose() r.sleep()
def test_square(): def fun_square(x): return np.linalg.norm(x) ** 2 def grad_square(x): return 2.0 * x random_state = np.random.RandomState(42) x = gradient_descent( x0=random_state.randn(5), alpha=0.1, grad=grad_square, n_iter=100, return_path=False ) f = fun_square(x) assert_almost_equal(f, 0.0)
def train_neural_network(data: np.ndarray, nodes_per_layer: List[int], **kwargs) -> NeuralNetwork: # Initialize our weights to random small values. epsilon = 0.1 # Decide the shapes of the adjacency matrices. If the first layer has 2 nodes, the second has 3 # nodes, and the last has 1 node, then the shapes should be [(2+1, 3), (3+1, 1)]. (The +1 is # because of the bias layer.) Θ_shapes = list(zip(nodes_per_layer, nodes_per_layer[1:])) for i in range(len(Θ_shapes)): m, n = Θ_shapes[i] Θ_shapes[i] = (m + 1, n) initial_Θs = [ np.random.uniform(low=-epsilon, high=epsilon, size=shape) for shape in Θ_shapes ] return gradient_descent(data, NeuralNetwork, J, grad_J, initial_Θs, **kwargs)
def get_normal_errors(function, best_params, hist_in, sigma, steps, points=10): #replace hist_in with x,y print '#---Calculating errors' spread = 2 * points + 1 error_points = sc.zeros((len(best_params), spread), float) for i in range(len(best_params)): error_points[i, 0] = best_params[i] for j in range(points): error_points[i, j + 1] = best_params[i] - (j + 1) * steps[i] for j in range(points): error_points[i, j + points + 1] = best_params[i] + (j + 1) * steps[i] #get R-squared values Rsquared_points = sc.zeros((error_points.shape), float) l, w = error_points.shape for i in range(l): for j in range(w): test_params = sc.zeros(len(best_params)) for k in range(len(best_params)): test_params[k] = best_params[k] test_params[i] = error_points[i, j] Rsquared_points[i, j] = R_squared_gauss(hist_in, test_params) """Rsquared_points[i,j] = R_squared(function, test_params,x,y,sigma)""" #Find gaussian fit to each parameter - maybe only fit sigma? error_out = [] for i in range(len(best_params)): error_x = error_points[i] error_y = Rsquared_points[i] error_y = -1.0 * (error_y / np.ma.max(error_y)) + 1.0 error_sigma = sc.ones(len(error_x)) error_params = sc.array([best_params[i], steps[i]]) error_fit = gd.gradient_descent(gaussian_function, error_params, error_x, error_y, error_sigma) #error_fit = gfe.Gradient_Descent(sc.array([error_x, error_y]), error_params) print '#---Error for parameter #', i print '#-parameter:', error_x print '#-R-squared:', error_y print '#-Error fit mean:', error_fit[0], ', sigma:', error_fit[1] error_out.append(error_fit) return sc.array(error_out)
def main(): # Set parameters degree = 1 eta = 1 max_iter = 20 # Load data and expand with polynomial features f = open('data.json', 'r') data = json.load(f) for k, v in data.items(): data[k] = np.array(v) # Encode list into numpy array # Expand with polynomial features X_train = toolbox.poly_2D_design_matrix(data['x1_train'], data['x2_train'], degree) n = X_train.shape[1] # Define the functions of the parameter we want to optimize def f(theta): return lr.cost(theta, X_train, data['y_train']) def df(theta): return lr.grad(theta, X_train, data['y_train']) # Test to verify if the computation of the gradient is correct toolbox.check_gradient(f, df, n) # Point for initialization of gradient descent theta0 = np.zeros(n) #### VARIANT 1: Optimize with gradient descent theta_opt, E_list = gd.gradient_descent(f, df, theta0, eta, max_iter) #### VARIANT 2: Optimize with gradient descent # theta_opt, E_list, l_rate_final = gd.adaptative_gradient_descent(f, df, theta0, eta, max_iter) # print('Adaptative gradient, final learning rate: {:.3g}'.format(l_rate_final)) #### VARIANT 3: Optimize with gradient descent # res = minimize(f, x0=theta0, jac=df, options={'disp': True}) # theta_opt = res.x.reshape((n, 1)) # E_list = [] toolbox.plot_logreg(data, degree, theta_opt, E_list) plt.show()
def nn(initial_pos, desired_shape, assignment): x = [] for i, j in assignment: x.append(initial_pos[i]) x.append(desired_shape[j]) y.append( gradient_descent.gradient_descent(initial_pos, desired_shape, x_star)) model = Sequential() model.add(Input(21)) model.add(Dense(30, activation='relu')) model.add(Dense(20, activation='relu')) model.add(Dense(3, activation='softmax')) model.compile(loss='mean_squared_error', optimizer='adam', metrics=['accuracy']) model.fit(x, y, epochs=1200, verbose=1) model.save('model.h5')
def get_normal_errors(function, best_params, hist_in, sigma, steps, points=10): #replace hist_in with x,y print '#---Calculating errors' spread = 2*points + 1 error_points = sc.zeros((len(best_params),spread),float) for i in range(len(best_params)): error_points[i,0] = best_params[i] for j in range(points): error_points[i,j+1] = best_params[i] - (j+1)*steps[i] for j in range(points): error_points[i,j+points+1] = best_params[i] + (j+1)*steps[i] #get R-squared values Rsquared_points = sc.zeros((error_points.shape), float) l, w = error_points.shape for i in range(l): for j in range(w): test_params = sc.zeros(len(best_params)) for k in range(len(best_params)): test_params[k] = best_params[k] test_params[i] = error_points[i,j] Rsquared_points[i,j] = R_squared_gauss(hist_in, test_params) """Rsquared_points[i,j] = R_squared(function, test_params,x,y,sigma)""" #Find gaussian fit to each parameter - maybe only fit sigma? error_out = [] for i in range(len(best_params)): error_x = error_points[i] error_y = Rsquared_points[i] error_y = -1.0*(error_y / np.ma.max(error_y)) + 1.0 error_sigma = sc.ones(len(error_x)) error_params = sc.array([best_params[i],steps[i]]) error_fit = gd.gradient_descent(gaussian_function, error_params,error_x, error_y, error_sigma) #error_fit = gfe.Gradient_Descent(sc.array([error_x, error_y]), error_params) print '#---Error for parameter #', i print '#-parameter:', error_x print '#-R-squared:', error_y print '#-Error fit mean:', error_fit[0], ', sigma:', error_fit[1] error_out.append(error_fit) return sc.array(error_out)
def plot_square(): def fun_square(x): return np.linalg.norm(x)**2 def grad_square(x): return 2.0 * x x, path = gradient_descent(x0=np.ones(1), alpha=0.7, grad=grad_square, n_iter=5, return_path=True) f = fun_square(x) plt.plot(path, [fun_square(x) for x in path], "-o", label="Gradient Descent Path") plt.plot(np.linspace(-1, 1, 1000), [fun_square(x) for x in np.linspace(-1, 1, 1000)], label="$f(x) = x^2$") plt.xlabel("$x$") plt.ylabel("$f(x)$") plt.legend(loc="best") plt.show()
start_params = sc.array( [5.0, -1.0, 1.0, 2.5, 1.0, 0.5]) #A function that gives a good start is nice here steps = sc.array([0.1, 0.01, 0.01, 0.1, 0.01, 0.01]) #1/10 of expected parameter order is usually good """ fit data """ MCMC_fit, errors, best_fit = mc.do_MCMC(function, start_params, steps, x_col, y_col, sig_col, name=identifier, number_steps=10000, save=1) best_fit = gd.gradient_descent(function, best_fit, x_col, y_col, sig_col) print '#---GD Best fit:', best_fit #plot data and fit header = [(identifier[0] + str(best_fit)), identifier[1], identifier[2]] if (pdf.plot_function(x_col, y_col, function, best_fit, save_name=identifier[0], title=[header[0], r'$radius$', r'$counts$'], save=1) == 1): print '#---Plotting successful' #Get Errors errors = he.get_hessian_errors(function, best_fit, x_col, y_col, sig_col,
return (x-1)**2 + (y-1)**3 def three_variable_function(x, y, z): return (x-1)**2 + (y-1)**3 + (z-1)**4 def six_variable_function(x1, x2, x3, x4, x5, x6): return (x1-1)**2 + (x2-1)**3 + (x3-1)**4 + x4 + 2*x5 + 3*x6 functions = [single_variable_function,two_variable_function,three_variable_function,six_variable_function] gradients = [[-2.0000000000000018],[-2.0000000000000018, 3.0001000000000055],[-2.0000000000000018, 3.0001000000000055, -4.0004000000000035],[-2.0000000000000018, 3.0001000000000055, -4.0004000000000035, 1.0000000000000009, 2.0000000000000018, 3.0000000000000027]] minims = [[0.0020000000000000018],[0.0020000000000000018, -0.0030001000000000055],[0.0020000000000000018, -0.0030001000000000055, 0.004000400000000004],[0.0020000000000000018, -0.0030001000000000055, 0.004000400000000004, -0.0010000000000000009, -0.0020000000000000018, -0.0030000000000000027]] grids = [[[0, 0.25, 0.75]], [[0, 0.25, 0.75], [0.9, 1, 1.1]],[[0, 0.25, 0.75], [0.9, 1, 1.1], [0, 1, 2, 3]], [[0, 0.25, 0.75], [0.9, 1, 1.1], [0, 1, 2, 3],[-2, -1, 0, 1, 2], [-2, -1, 0, 1, 2], [-2, -1, 0, 1, 2]]] grid_minims = [[0.75],[0.75, 0.9], [0.75, 0.9, 1], [0.75, 0.9, 1, -2, -2, -2]] for f in functions: indx = functions.index(f) print('Gradient and Minim Test',indx + 1) minimizer = gradient_descent(f) grad = minimizer.compute_gradient(delta = 0.01) minimizer.descend(scaling_factor=0.001, delta=0.01, num_steps=1) assert grad == gradients[indx],'wrong gradient' assert minimizer.minim == minims[indx],'wrong minim' print('passed') for f in functions: indx = functions.index(f) print('Grid Search Test',indx + 1) minimizer = gradient_descent(f) minimizer.grid_search(grids[indx]) assert minimizer.minim == grid_minims[indx],'wrong grid minim' print('passed')
iterations = 1500 alpha = 0.01 print('\nTesting the cost function ...\n') J = compute_cost(X, y, theta) print('With theta = [0 ; 0]\nCost computed = %f\n' % J) print('Expected cost value (approx) 32.07\n') J = compute_cost(X, y, np.mat('-1 ; 2')) print('With theta = [-1 ; 2]\nCost computed = %f\n' % J) print('Expected cost value (approx) 54.24\n') [theta, J_history] = gradient_descent(X, y, theta, alpha, iterations) print('Theta found by gradient descent:\n') print('%s\n' % theta) print('Expected theta values (approx)\n') print(' -3.6303\n 1.1664\n\n') plot_data(X[:, 1], X.dot(theta), '-') plt.show() predict1 = np.array([1, 3.5]).dot(theta) print('For population = 35,000, we predict a profit of %f\n' % np.sum(predict1*10000)) predict2 = np.array([1, 7]).dot(theta)
"""Linear regression.""" from gradient_descent import gradient_descent from numpy import dot def h(t, x): return dot(t, x) def J(t, x, y): """Cost function.""" m = len(x) return sum((h(t, x[i]) - y[i])**2 for i in range(m)) / 2 / m x = [[1, 0, 0], [1, 2, 1], [1, 3, 2], [1, 4, 3]] y = [1, 6, 9, 12] # y = 1 + 2 * x1 + x2 t0 = [0, 1, 2] print gradient_descent(h, x, y, t0) # t converges to [1, 2, 1]
from __future__ import division from sklearn import datasets import pandas as pd import numpy as np from gradient_descent import gradient_descent data = datasets.load_iris() X = data.data[:100, :2] y = data.target[:100] X_full = data.data[:100, :] shape = X.shape[1] y_flip = np.logical_not(y) y = np.array(y_flip)*1 betas = np.zeros(shape) fitted_values, cost_iter = gradient_descent(betas, X, y) print fitted_values
def PMF(input_matrix, approx=50, iterations=30, learning_rate=.001, regularization_rate=.1): A = input_matrix Z = np.asarray(A > 0,dtype=np.int) A1d = np.ravel(A) mean = np.mean(A1d) scale = np.max(A1d)-np.min(A1d) A = A-mean K = approx R = itr = iterations l = learning_rate b = regularization_rate N = A.shape[0] M = A.shape[1] U = np.random.randn(N,K) V = np.random.randn(K,M) opt = {"alg": "python"} #opt = {"alg": "cython"} #opt = {"alg": "inline"} if opt["alg"] == "python": for r in range(R): for i in range(N): for j in range(M): if Z[i,j] > 0: e = A[i,j] - np.dot(U[i,:],V[:,j]) U[i,:] = U[i,:] + l*(e*V[:,j] - b*U[i,:]) V[:,j] = V[:,j] + l*(e*U[i,:] - b*V[:,j]) A_ = np.dot(U,V) elif opt["alg"] == "cython": import gradient_descent A_ = gradient_descent.gradient_descent(A,U,V,Z,K,R,N,M,l,b) elif opt["alg"] == "inline": #http://technicaldiscovery.blogspot.com/2011/06/speeding-up-python-numpy-cython-and.html #This code has overflow issues on some data... buyer beware from scipy.weave import inline from scipy.weave import converters weave_options = {'extra_compile_args': ['-O3'], 'compiler': 'gcc'} code = \ r""" int r,i,j,k; double e; for(r=0; r<R; r++){ for(i=0; i<N; i++){ for(j=0; j<M; j++){ for(k=0; k<K; k++){ if(Z(i,j)){ e = A(i,j)-(U(i,k)*V(k,j)); U(i,k) = U(i,k) + l*(e*V(k,j)-b*U(i,k)); V(k,j) = V(k,j) + l*(e*U(i,k)-b*V(k,j)); } } } } } """ inline(code, ['A','K','R','l','b','N','M','U','V','Z'], type_converters=converters.blitz, auto_downcast=0, **weave_options) A_ = np.dot(U,V) return A_+mean
if data[j,2] > (mu+spread): continue if data[j,2] < (mu-spread): continue holder.append(data[j,2]-data[j,3]) histogram = h.make_hist(holder, 0.01) h.plot_histogram(histogram, 0.01, name=('hist_'+str(i)), x_label=r'$(g-r)_0$') #h.plot_multiple_hist(histogram[:,0], [histogram[:,1]], 0.01, name=('hist_'+str(i)), x_label=r'$(g-r)_0$') #x_col, y_col, sig_col = histogram[:,0], (histogram[:,1]/len(holder)), (func.poisson_errors(histogram[:,1])/len(holder) ) x_col, y_col, sig_col = histogram[:,0], (histogram[:,1]), (func.poisson_errors(histogram[:,1])) #print x_col, y_col, sig_col start_params = sc.array([0.2, 0.1, sc.ma.max(y_col)]) steps = sc.array([0.001, 0.001, 0.01]) function = func.gaussian_function #Fit function MCMC_fit, errors, best_fit = mc.do_MCMC(function, start_params, steps, x_col, y_col, sig_col, name=('histfit_'+str(i)), number_steps=10000, save=0) best_fit = gd.gradient_descent(function, best_fit, x_col, y_col, sig_col) errors = he.get_hessian_errors(function, best_fit, x_col, y_col, sig_col, steps) turnoff.append([best_fit[0], best_fit[1], errors[0], errors[1]]) # if (pdf.plot_function(x_col, y_col, function, best_fit, save_name=('histfitplot_'+str(i)), # title=['', r'$(g-r)_0$', 'Normalized Counts'], save=1) == 1): plt.figure() plt.scatter(x_col, y_col) func_x = sc.arange( (sc.ma.min(x_col)*0.9), (sc.ma.max(x_col)*1.1), 0.01) func_y = function(func_x, best_fit) plt.plot(func_x, func_y) plt.savefig(('histfitplot_'+str(i)+'.ps'), papertype='letter') print "#-Fit dataplot saved" #print turnoff print turnoff if f.write_data(sc.array(turnoff), 'turnoff_out_fixed.txt') == 1: print "#-All Results Successfully saved"
def test_gradient_descent(self): initial_guess = numpy.array([[3]]) v = gradient_descent(self.parabola, initial_guess, abs_error = 1e-10) x = v[0, 0] self.assertAlmostEqual(x, 1, delta=0.01)
"""Logistic regression.""" from gradient_descent import gradient_descent from math import exp, log from numpy import dot def h(t, x): """Hypothesis sigmoid function.""" return 1 / (1 + exp(-dot(t, x))) def J(t, x, y): """Simplified cost function.""" m = len(x) return -sum(y[i] * log(h(t, x[i])) + (1 - y[i]) * log(1 - h(t, x[i])) for i in range(m)) / m x = [[1, -2], [1, -1], [1, 1], [1, 2]] y = [0.12, 0.27, 0.73, 0.88] # y = [0, 0, 1, 1] t0 = [1, 2] print gradient_descent(h, x, y, t0) # Not working
pad = np.matrix(np.ones(m)).T X = np.hstack([pad, X]) n = X.shape[1] #this needs to be assigned AFTER the pad step #In initial theta to feed into gradient descent theta_init = np.matrix(np.zeros(n)).T #Gradient descent params alpha = 0.01 iters = 1500 #Run gradient descent run_descent = gradient_descent(X=X, y=y, theta=theta_init, cost_function = compute_cost, alpha=alpha, iters=iters) theta = run_descent["theta"] cost_hist = run_descent["cost_history"] theta_hist = run_descent["theta_history"] #predict for ex2data2 ''' x = np.matrix([1800, 4]) predict = predict_regression(x=x, theta=theta, mean=X_mean, std=X_std) '''
def logistic_regression(train_X,train_y,test_X,test_y): print "Logistic Regression with Gradient Descent..." w = gd.gradient_descent(train_X,train_y,alpha,MAX_ITER,False) predicted = gd.predict_boolean(test_X,w,0.4) acc = np.sum(predicted==test_y)/float(len(test_y)) print "Logistic Regression, Accuracy: %f" %acc
for y in self.possible_y_values: calculated_probs.append((self.p_of_y_given_x(weights, y, x), y)) assert abs(sum([d[0] for d in calculated_probs]) - 1) < 1e-8 return max(calculated_probs)[1] ############################################################################### # Notes: example_dataset = [(1, '+'), (2, '+'), (3, '+'), (4, '+'), (5, '-'), (6, '-'), (7, '-')] review_loss_func = LogisticRegressionLoss(feature_function, example_dataset) weights = gradient_descent([10, 10, 10, 10], .3, review_loss_func) # for d in test_data: # prediction = review_loss_func.predict(d, weights) # print d, RAW[d], review_loss_func.p_of_y_given_x(weights, prediction, d), prediction # print review_loss_func.predict(8, weights) # for d in example_dataset: # print d, review_loss_func.predict(d[0], weights) # print d, review_loss_func.p_of_y_given_x(weights, d[1], d[0])