def train(self, features, labels, normalisedlabels=False, names=None, **kwargs): def error(bs): response = bs[0] + np.dot(features, bs[1:]) response = _sigmoidal(response) diff = response - labels log_like = np.dot(diff, diff) L2_penalty = self.alpha * np.dot(bs, bs) return log_like + L2_penalty def error_prime(bs): fB = np.dot(features, bs[1:]) response = _sigmoidal(bs[0] + fB) sprime = response * (1-response) ds = (response - labels) * sprime b0p = np.sum(ds) b1p = np.dot(features.T, ds) bp = np.concatenate( ([b0p], b1p) ) return 2.*(bp + self.alpha*bs) features = np.asanyarray(features) if not normalisedlabels: labels, _ = normaliselabels(labels) N,f = features.shape bs = np.zeros(f+1) try: from scipy import optimize # Some testing revealed that this was a good combination # call fmin_cg twice first and then fmin # I do not understand why 100%, but there it is bs = optimize.fmin_cg(error, bs, error_prime, disp=False) bs = optimize.fmin_cg(error, bs, error_prime, disp=False) bs = optimize.fmin(error, bs, disp=False) except ImportError: import warnings warnings.warn('''\Could not import scipy.optimize. Fall back to very simple gradient descent (which is slow).''') bs = np.zeros(f+1) cur = 1.e-6 ebs = error(bs) for i in xrange(1000000): dir = error_prime(bs) step = (lambda e : bs - e *dir) enbs = ebs + 1 while enbs > ebs: cur /= 2. if cur == 0.: break nbs = step(cur) enbs = error(nbs) while cur < 10.: cur *= 2 nnbs = step(cur) ennbs = error(nnbs) if ennbs < enbs: nbs = nnbs enbs = ennbs else: break bs = nbs ebs = enbs return logistic_model(bs)
def conjugate_gradient(x0, f, f_prime, hessian=None): all_x_i = [x0[0]] all_y_i = [x0[1]] all_f_i = [f(x0)] def store(X): x, y = X all_x_i.append(x) all_y_i.append(y) all_f_i.append(f(X)) optimize.fmin_cg(f, x0, f_prime, callback=store, gtol=1e-12) return all_x_i, all_y_i, all_f_i
def logRegression(): '''This data is simulated''' data = np.loadtxt('logregression.txt') y = data[:,0] x = np.ones(data.shape) x[:,1:] = data[:,1:] def objective(b): '''Return -1*l(b), where l is the log likelihood.''' return np.log(1+np.exp(np.dot(x,b))).sum() - (y*(np.dot(x,b))).sum() guess = np.array([1., 1., 1., 1.]) fmin_cg(objective, guess)
def fit(self, X, Y): # save data and labels for plotting methods self.data = X self.labels = Y # get number of observations, features from data self.n_obs, self.n_features = X.shape # now make the weights attribute self.weights = np.random.rand(self.n_features) self.weights_history.append(self.weights) # use the scipy optimize Conjugate Gradient method optimize.fmin_cg(self.cost, self.weights, fprime=self.gradient, args=(X, Y))
def train(self, inputArr2D, targets, costFunc, costFuncGrad, maxIter=100): ''' This method will fit the weights of the neural network to the targets. :param inputArr2D: 1 input per row. :param targets: ground truth class label for each input :param costFunc: callable *f(paramToOptimize, \*arg)* that will be used as cost function. :param costFuncGrad: callable *f'(paramToOptimize, \*arg)* that will be used to compute partial derivative of cost function over each parameter in paramToOptimize. ''' self.forwardPropogateAllInput(inputArr2D) # perform forward propagation to set self.outputs avgEx = 1.0 / targets.shape[0] flatWeights = asarray(self.layersExOutputLy[0].forwardWeight) for ly in self.layersExOutputLy[1:]: ly.avgActvArrAllEx = avgEx * npsum(ly.self2D[:, :-1], 0) flatWeights = append(flatWeights, asarray(ly.forwardWeight)) fmin_cg(costFunc, flatWeights, costFuncGrad, (inputArr2D, targets, self.__weightDecayParam, self.__sparsity, self.__sparseParam, self), maxiter=maxIter, full_output=True) # fmin_cg calls grad before cost func
def train_network(X, Y, layers, regularization = 0, max_iters = 200): """ Train a neural network and return the model. Args: X (array): data consisting of rows of features. Y (array): array of labels corresponding to each row in X. Must consist of integers from 0 to n for some integer n. layers (list): the number of features in each layer. The first entry must be the number of features (columns) in X, the last must be the number of classes, and those inbetween determine the size of each hidden layer. regularization (int): penalty factor for having larger weights. (defualt: 0). max_iters (int): the max number of iterations used by the algorithm when searching for optimal weights. A higher number will produce a better fit but extends run time (default: 200). """ check_input_validity(X, Y, layers) num_classes = layers[-1] network = NeuralNetwork(layers) initial_weights = flatten_weights(network.initialize_weights()) Y = process_labels(Y, num_classes) optimal = fmin_cg(compute_cost, initial_weights, back_propogate, args = (X, Y, network, regularization), maxiter = max_iters) forward_propogate(network.reshape_weights(optimal), X, network) network.weights = network.reshape_weights(optimal) return NeuralNetModel(network)
def read_new_file(): global Xtest, Ytest, Ysig2, target, X, Y, thetaArgs, theta, EI, yoffset, hyperprior filename = filestem + str(rng.randint(10)) + '.txt' print 'This is %s' % (filename) data = np.loadtxt(filename) Xtest = data[:,0:-1] # everything except the last column target = data[:,-1] # just the last column D = 1 # dimensionality of search space yoffset = 0.0 # to keep track of shifts up and down to y-axis initNumSamples = 2 # number of initial inputs #xlo,xhi =0.01,4.0 # just the plotting boundary, not a real constrait :( #Xtest = np.arange(xlo,xhi,(xhi-xlo)/100.0) # the inputs we'll keep track of. d = len(np.ravel(Xtest)) Xtest = Xtest.reshape((d,1)) # take the initial samples X = np.zeros((initNumSamples)) Y = np.zeros((initNumSamples,)) for s in range(initNumSamples): i = rng.randint(0,d) X[s] = Xtest[i] Y[s] = target[i] initX,initY = X,Y # Here we initialise hyperparameters, and make initial predictions. (init_theta, hyperprior) = gp.setAndSampleHyperprior(D) # initial hyperparams thetaArgs = (X.reshape(len(X),1),Y,hyperprior) # fmin_cg needs these all in one box. theta = fmin_cg(gp.calcNegLogPosterior,init_theta, gp.calcNegGradLogPosterior, [thetaArgs], gtol=1e-2,disp=0) Ytest,Ysig2 = gp.calcGPPrediction(theta,thetaArgs,Xtest) EI = calcEI(Ytest,Ysig2,np.max(Y))
def train(self, X, y, lmda): ''' Train the neural networks parameters. Call scipy's fming_cg function for optimization. Arguments: X (m x n float matrix): Training examples. y (m 1d int array): Outputs of training examples. lmda (float): Lambda value for regularization. ''' theta1 = self._rand_init_weights(self.input_layer_size, self.hidden_layer_size) theta2 = self._rand_init_weights(self.hidden_layer_size, self.output_layer_size) theta = self.unroll(theta1, theta2) args = (X, y, lmda) res = opt.fmin_cg(self._cost, x0=theta, fprime=self._gradient_backpropagation, args=args, maxiter=50, disp=False, full_output=True) # Save min cost and thetas. self.min_theta1, self.min_theta2 = self.roll(res[0]) self.min_cost = res[1] print('cost {}'.format(self.min_cost))
def train_basis(basis, S, R, S_test, R_test, Mphi, Mrew, patience, max_iter, weighting, min_imp = 1e-5): try: n_test_inc = 0 best_test_loss = numpy.inf while (n_test_inc < patience): basis.set_params( fmin_cg(basis.loss, basis.flat_params, basis.grad, args = (S, R, Mphi, Mrew), full_output = False, maxiter = max_iter, gtol = 1e-8) ) err = basis.loss(basis.flat_params, S_test, R_test, Mphi, Mrew) if err < (best_test_loss - min_imp): n_test_inc = 0 print 'new best %s loss: ' % basis.loss_type, best_test_loss else: n_test_inc += 1 logger.info( 'iters without better %s loss: ' % basis.loss_type, n_test_inc) if err < best_test_loss: best_test_loss = err best_theta = copy.deepcopy(basis.theta) basis.theta = best_theta except KeyboardInterrupt: print '\n user stopped current training loop' return basis
def test_cg(self): # conjugate gradient optimization routine if self.use_wrapper: opts = {'maxiter': self.maxiter, 'disp': self.disp, 'return_all': False} res = optimize.minimize(self.func, self.startparams, args=(), method='CG', jac=self.grad, options=opts) params, fopt, func_calls, grad_calls, warnflag = \ res['x'], res['fun'], res['nfev'], res['njev'], res['status'] else: retval = optimize.fmin_cg(self.func, self.startparams, self.grad, (), maxiter=self.maxiter, full_output=True, disp=self.disp, retall=False) (params, fopt, func_calls, grad_calls, warnflag) = retval assert_allclose(self.func(params), self.func(self.solution), atol=1e-6) # Ensure that function call counts are 'known good'; these are from # Scipy 0.7.0. Don't allow them to increase. assert_(self.funccalls == 9, self.funccalls) assert_(self.gradcalls == 7, self.gradcalls) # Ensure that the function behaves the same; this is from Scipy 0.7.0 assert_allclose(self.trace[2:4], [[0, -0.5, 0.5], [0, -5.05700028e-01, 4.95985862e-01]], atol=1e-14, rtol=1e-7)
def _fit_cg(f, score, start_params, fargs, kwargs, disp=True, maxiter=100, callback=None, retall=False, full_output=True, hess=None): gtol = kwargs.setdefault('gtol', 1.0000000000000001e-05) norm = kwargs.setdefault('norm', np.Inf) epsilon = kwargs.setdefault('epsilon', 1.4901161193847656e-08) retvals = optimize.fmin_cg(f, start_params, score, gtol=gtol, norm=norm, epsilon=epsilon, maxiter=maxiter, full_output=full_output, disp=disp, retall=retall, callback=callback) if full_output: if not retall: xopt, fopt, fcalls, gcalls, warnflag = retvals else: xopt, fopt, fcalls, gcalls, warnflag, allvecs = retvals converged = not warnflag retvals = {'fopt': fopt, 'fcalls': fcalls, 'gcalls': gcalls, 'warnflag': warnflag, 'converged': converged} if retall: retvals.update({'allvecs': allvecs}) else: xopt = None return xopt, retvals
def run_once(x0,x1): sol = fmin_cg(rosen, [x0, x1], retall = True, full_output=1) xy = numpy.asarray(sol[-1]) sam.putarray('xy',xy) sam.eval("plot(xy(:,1),xy(:,2),'w-','LineWidth',2)") sam.eval("plot(xy(:,1),xy(:,2),'wo','MarkerSize',6)") return sol
def nnTrain(initial_theta, input_layer_size, hidden_layer_size, num_labels, X, y, lamda=0): """ Trains a neural network, returns theta1, theta2 Given: initial_theta: unrolled randomized theta that breaks symmetry nn layer sizes num_labels: number of output classes (equals K) X: m,n (DON'T include bias term in input X) y: m,K (vectorized representation for each y) lamda Returns: theta1, theta2 """ results = optimize.fmin_cg( nnCostFunction, fprime=nnGradientFunction, x0=initial_theta, args=(input_layer_size, hidden_layer_size, num_labels, X, y, lamda), maxiter=50, disp=False, full_output=True ) theta_optimized = results[0] min_cost = results[1] rolled_theta_optimized = rolltheta(theta_optimized, input_layer_size, hidden_layer_size, num_labels) return rolled_theta_optimized[0], rolled_theta_optimized[1]
def train(self, X, Theta, Y, R, lmda): ''' Train for the collaborative filtering and keep learned parameters in instance variable. Arguments: X (num_movies x num_features float): Matrix of movies features samples where each row of X corresponds to the feature vector x[i] for the i-th movie. Theta (num_users x num_features float): Matrix of user features the j-th row of Theta corresponds to one parameter vector theta[j], for the jth user. Y (num_movies x num_users float): Stores ratings (from 1 to 5). R (): The matrix R is an binary-valued indicator matrix where R[i, j]==1 if user j gave a rating to movie i and. R[i, j]==0 if user j didn't give a rating to movie i. lmda (float): Regularization parameter lambda. ''' self.min_X = None self.min_Theta = None self.min_cost = None # Initialize theta and args for fmin_cg. params = self.unroll(X, Theta) args = (Y, R, lmda) res = opt.fmin_cg(self.cost, x0=params, fprime=self.gradient, args=args, maxiter=100, disp=False, full_output=True) self.min_X, self.min_Theta = self.roll(res[0]) self.min_cost = res[1]
def oneVsAll(self, X, y, num_labels, lmd): ''' Trains multiple logistic regression classifiers, on the same data. Training data will have positive data sets (matching digit being trained for) and negative data sets (other digits). Each training session uses all data to compute one row of theta matrix ''' m = y.shape[0] ones = np.ones(m) X = np.column_stack((ones, X)) n = X.shape[1] all_theta = np.zeros((num_labels, n)) for label in xrange(1, num_labels+1): match = (y == label) init_theta = np.zeros(n) # all_theta[label-1,:] = optimize.fmin_cg(self.lrCostFunction, fprime=self.lrGradFunction, x0=init_theta, args=(X, match, lmd), maxiter=200) all_theta[label-1,:] = optimize.fmin_cg(self.lrCostFunction, x0=init_theta, args=(X, match, lmd), maxiter=200) self.all_theta = all_theta return all_theta
def test_cg(self, use_wrapper=False): """ conjugate gradient optimization routine """ if use_wrapper: opts = {'maxit': self.maxiter, 'disp': False} params, info = optimize.minimize(self.func, self.startparams, args=(), method='CG', jac=self.grad, options=opts, full_output=True, retall=False) fopt, func_calls, grad_calls, warnflag = \ info['fun'], info['nfev'], info['njev'], info['status'] else: retval = optimize.fmin_cg(self.func, self.startparams, self.grad, (), maxiter=self.maxiter, full_output=True, disp=False, retall=False) (params, fopt, func_calls, grad_calls, warnflag) = retval err = abs(self.func(params) - self.func(self.solution)) #print "CG: Difference is: " + str(err) assert_(err < 1e-6) # Ensure that function call counts are 'known good'; these are from # Scipy 0.7.0. Don't allow them to increase. assert_(self.funccalls == 9, self.funccalls) assert_(self.gradcalls == 7, self.gradcalls) # Ensure that the function behaves the same; this is from Scipy 0.7.0 assert_(np.allclose(self.trace[2:4], [[0, -0.5, 0.5], [0, -5.05700028e-01, 4.95985862e-01]], atol=1e-14, rtol=1e-7), self.trace[2:4])
def linear_optimize(X, y, lamda): lamda = float(lamda) num_samples = float(len(y)) def cost_function(theta): regularization_term = (lamda / (2.0 * num_samples)) * \ np.sum(np.power(theta[1::], 2)) cost = (1.0 / (2.0 * num_samples)) * np.sum( np.power(np.subtract(np.dot(X, theta), y), 2)) + regularization_term return cost def gradient_function(theta): grad = np.zeros_like(theta) grad[0] = (1 / num_samples) * np.sum( np.subtract(np.dot(X, theta), y) * X[0::,0]) for i in range(1, len(grad)): reg_term = (lamda / num_samples) * theta[i] grad[i] = (1 / num_samples) * np.sum( np.subtract(np.dot(X, theta), y) * X[0::,i]) + reg_term return grad initial_theta = np.array(np.zeros((X.shape[1], 1)), dtype=np.float64) return fmin_cg( f=cost_function, x0=initial_theta, fprime=gradient_function, maxiter=200, disp=0)
def fit(self, x_n3, y_n3): trans_init = (y_n3.mean(axis=0) - x_n3.mean(axis=0))[:2] self_copy = deepcopy(self) def f(params): self_copy.set_params(params) xmapped_n3 = self_copy.transform_points(x_n3) return fit_score(xmapped_n3, y_n3,.5) # vals_params = [] # for rot_init in rot_inits: # opt_params, opt_val, _, _, _ = opt.fmin_cg(f, np.r_[trans_init],full_output=True) # vals_params.append((opt_val, opt_params)) # # # best_val, best_params = min(vals_params, key = lambda x:x[0]) best_params, best_val, _,_,_ = opt.fmin_cg(f, np.r_[trans_init], full_output=True) print "best_params:", best_params self.set_params(best_params) self.objective = best_val Globals.setup() draw_orig_new_warped_pcs(x_n3, y_n3, self.transform_points(x_n3))
def test(): data = np.loadtxt("data.txt") X = data[:,0:-1] # everything except the last column y = data[:,-1] # just the last column args = (X,y) #theta = np.array([ 1.7657065779589087, -1.3841332550882446, -10.162222605402242]) #theta = np.array([ 1.7999382115210827, -14.001391904643032 , -5.577578503745549]) theta = np.zeros(3) theta[0] = np.random.normal(0,5) theta[1] = np.random.normal(0,5) theta[2] = np.random.normal(0,5) print theta print np.exp(theta) print logPosterior(theta,args) print gradLogPosterior(theta,args) print so.check_grad(logPosterior, gradLogPosterior, theta, args) newTheta = so.fmin_cg(logPosterior, theta, fprime=gradLogPosterior, args=[args], gtol=1e-4,maxiter=100,disp=1) print newTheta, logPosterior(newTheta,args) K = kernel2(X,X,newTheta,wantderiv=False) L = np.linalg.cholesky(K) beta = np.linalg.solve(L.transpose(), np.linalg.solve(L,y)) test = X #pred = [predict(i,input,K,target,newTheta,L,beta) for i in input] #pred = np.squeeze([predict(i,input,K,target,newTheta,L,beta) for i in input]) demoplot(theta,args) demoplot(newTheta,args)
def unblur(y, msk, sigma, maxiter=20): """ y should be zero in the mask """ cache = {'xf': None, 'res': None} nvoxels = np.sum(1 - msk) print nvoxels x = np.array(y) dom = True - msk def residual(xf): if not xf is cache['xf']: x[dom] = xf cache['res'] = blur(x, msk, sigma)[dom] - y[dom] cache['xf'] = xf return cache['res'] def callback(xf): print error(xf) def error(xf): return .5 * np.sum(residual(xf) ** 2) xf = op.fmin_cg(error, x[dom], fprime=residual, maxiter=maxiter, callback=callback) x[dom] = xf return x
def fit(self, x_n3, y_n3, prev_params=None): if prev_params is None: trans_init = y_n3.mean(axis=0) - x_n3.mean(axis=0) rot_inits = [(0, 0, 0)] else: trans_init = prev_params[:3] rot_inits = [prev_params[3:]] self_copy = deepcopy(self) def f(params): self_copy.set_params(params) xmapped_n3 = self_copy.transform_points(x_n3) return fit_score(xmapped_n3, y_n3,.5) vals_params = [] for rot_init in rot_inits: opt_params, opt_val, _, _, _ = opt.fmin_cg(f, np.r_[trans_init, rot_init],full_output=True) vals_params.append((opt_val, opt_params)) best_val, best_params = min(vals_params, key = lambda x:x[0]) print "best_params:", best_params self.set_params(best_params) self.objective = best_val Globals.setup() draw_orig_new_warped_pcs(x_n3, y_n3, self.transform_points(x_n3))
def fit(X,y,maxiter = 50,method = 'TNC',lam = 0.1): no_of_rows = X.shape[0] no_of_features = X.shape[1] no_of_labels = len(set(y)) fit_theta = np.zeros((no_of_labels,no_of_features+1)) #adding a vector of ones to the X matrix(as the first column) - bias terms for each training exmaples X = np.insert(X,0,1,axis=1) initial_theta = np.zeros((no_of_features+1,1)) for i in range(no_of_labels): temp_y = (y == (i)) + 0 # here labels are 0,1,2,3.. if they are 1,2,3,4... use: temp_y = (y == (i+1))+0 #temp_y is a vector of size no_of_training_examples #since each iteration corresponds to finding theta for a single class (one-vs-all) #each time, we only take the predection of class 'i'on all training example _res = optimize.fmin_cg(returnJ, fprime=returnThetaGrad,x0 = initial_theta,args=(X, temp_y,lam), maxiter=50, disp=False, full_output=True) fit_theta[i,:] = _res[0] """ different minimization functions (above and below) """ #options = {'maxiter': maxiter} #_res = optimize.minimize(returnJ, initial_theta, jac=returnThetaGrad, method=method,args=(X, temp_y,lam), options=options) #fit_theta[i,:] = _res.x return fit_theta
def train(self, myiter = 200): print "training..." init_theta = self.get_theta() theta = fmin_cg(self.cost, init_theta, args = (self.X, self.y, self.LAMBDA), fprime = self.grad, maxiter = myiter) return theta
def optimize(self, theta, maxfun, revert=False): '''real executing function''' self.size = len(theta) self.revert = revert initials = [] index = 0 self.feature2index, self.index2feature = {}, {} for feature, value in theta.items(): self.feature2index[feature] = index self.index2feature[index] = feature initials.append(float(value)) index += 1 if self.method == "LBFGS": from scipy.optimize import fmin_l_bfgs_b # http://docs.scipy.org/doc/scipy/reference/generated/scipy.optimize.fmin_l_bfgs_b.html (xopt, fopt, return_status) = fmin_l_bfgs_b(self.value_translator, initials, self.gradient_translator, pgtol=0.1, maxfun=maxfun) # print "============Optimization by LBFGS returns: ", return_status['task'] elif self.method == "CG": from scipy.optimize import fmin_cg # http://www.scipy.org/doc/api_docs/SciPy.optimize.optimize.html#fmin_cg (xopt, fopt, _, _, return_status) = fmin_cg(self.value_translator, initials, self.gradient_translator, full_output=1, disp=0) # print "============CG: ", return_status else: raise Exception("No optimization method defined!") self.size = None for i, x in enumerate(xopt): theta[self.index2feature[i]] = x return (fopt, theta, return_status)
def run_once(x0,x1): sol = fmin_cg(rosen, [x0, x1], retall = True, full_output=1) xy = numpy.asarray(sol[-1]) pylab.plot(xy[:,0],xy[:,1],'w-',linewidth=2) pylab.plot(xy[:,0],xy[:,1],'wo',markersize=6) show() return sol
def fit(self, data, targets): if type(data) is not np.ndarray: data = np.array(data) if type(targets) is not np.ndarray: targets = np.array(targets) assert data.ndim == 2 assert targets.ndim == 1 assert len(data) == len(targets) # Turn 1D targets array into a 2D array targets_2D = np.zeros((self._layer_sizes[-1], len(targets))) for obs_number, label in enumerate(targets): # watch out! observations go down columns here targets_2D[label, obs_number] = 1 data = data.transpose() def obj(Thetas_vec): return calc_cost(data, targets_2D, self._lmbda, unflatten(Thetas_vec, self._layer_sizes)) def obj_grad(Thetas_vec): curr_Thetas = unflatten(Thetas_vec, self._layer_sizes) targets_est, As, Zs = feedforward_full(curr_Thetas, data) deltas = backprop(curr_Thetas, Zs, targets_est, targets_2D) grads = calc_grads(deltas, As, self._lmbda, curr_Thetas) return flatten(grads) f_prime = get_obj_grad(layer_sizes, features, classes, lmbda) min_thetas = fmin_cg(obj, flatten(init_thetas), f_prime, maxiter=max_iter) self._Thetas = min_thetas return self
def fit(Y, R, alpha, n): """ Fits the parameters of the collaborative filtering model Arguments ---------- Y: mxu rating matrix R: mxu i has been rated by j boolean matrix n: Number of features. alpha: regularization parameter controls model complexity. Return ---------- (X,Theta) X: mxn feature matrix Theta: uxn weight matrix """ m, u = Y.shape p = np.random.random((m + u) * n) # minimize cost function costf = lambda x: cost(x, Y, R, alpha) gradf = lambda x: grad(x, Y, R, alpha) p = fmin_cg(costf, p, fprime=gradf, maxiter=100, disp=False) # unroll parameters X = np.resize(p[:m * n], (m, n)) Theta = np.resize(p[m * n:], (u, n)) return (X, Theta)
def train_cg(self, input, target, **kwargs): """ Train network with conjugate gradient algorithm. :Parameters: input : 2-D array Array of input patterns target : 2-D array Array of network targets maxiter : integer, optional Maximum number of iterations (default is 10000) disp : bool If True convergence method is displayed (default) .. seealso:: `scipy.optimize.fmin_cg` optimizer is used in this method. Look at its documentation for possible other useful parameters. """ if 'maxiter' not in kwargs: kwargs['maxiter'] = 10000 input, target = self._setnorm(input, target) func = netprop.func fprime = netprop.grad extra_args = (self.conec, self.bconecno, self.units, \ self.inno, self.outno, input, target) self.weights = optimize.fmin_cg(func, self.weights, fprime=fprime, \ args=extra_args, **kwargs) self.trained = 'cg'
def test_logreg_grad(self): ex1 = ml.logreg_grad(np.zeros(3), self.ex2data1[0], self.ex2data1[1]) ex2 = optimize.fmin_cg(ml.logreg_cost, np.ones(3), args=(self.ex2data1[0], self.ex2data1[1]), fprime = ml.logreg_grad) self.assertEqual(round(np.sum(ex1),1), -23.4) self.assertEqual(round(np.sum(ex2), 2), -24.75)
def train(self, lmda): ''' Train using training examples, call scipy's fming_cg function for optimization. Arguments: lmda (float): Lambda value for regularization. Return: (float): Cost. (1d float array): Minimum thetas. ''' # Reset before training. self._min_theta = None self._costs = None # Initialize theta and args for fmin_cg. theta = np.zeros(self._X.shape[0]) args = (self._X, self._y, lmda) res = opt.fmin_cg(LinearRegressionRegularized._cost, x0=theta, fprime=LinearRegressionRegularized._gradient, args=args, maxiter=200, disp=False, full_output=True) self._min_theta = res[0] self._costs = res[1]
gradient[i][j] = derivative j += 1 i += 1 val = gradient.flatten() print(val.shape) return val Y, W, X_prim = generateData() W0 = np.ones([10, 2]) W0 = W0.flatten().T W0 = np.random.randn(20) W0 = W0.reshape((10, 2)).flatten().T args = Y W_star = opt.fmin_cg(f, W0, fprime=dfx, args=(Y, )) # x_star = A_star W_star = W_star.reshape((2, 10)).T inner = np.linalg.inv(np.dot(W_star.T, W_star)) X_approx = np.dot(Y, np.dot(W_star, inner)) print(X_approx[50:60]) print(X_prim[50:60]) line1, = plt.plot(X_approx[:, 0], X_approx[:, 1], label="Learned $X'$", color='b') line2, = plt.plot(X_prim[:, 0], X_prim[:, 1], label="True $X'$", color='r') fl = plt.legend(handles=[line1], loc=1) ax = plt.gca().add_artist(fl) plt.legend(handles=[line2], loc=4)
m, n = X_pf.shape theta_pf = np.zeros((n + 1, 1)) #print theta_pf.shape result = op.minimize(computeCost, theta_pf, method='TNC', jac=Gradient, args=(X_pf, Y), options={'maxiter': 200}) print result r = op.fmin_bfgs(computeCost2, theta_pf, args=(X_pf, Y), maxiter=40) print r print r[0] theta_pf = r print op.fmin_cg(computeCost2, theta_pf, maxiter=200, args=(X_pf, Y)) #theta_pf=result.x theta_pf = theta_pf.reshape((n + 1, 1)) #print theta_pf ''' ### Plotting the Linear Regulization line ### ''' scatter(X, Y, marker='x') plt.xlabel('Change in water levels(X)') plt.ylabel('Water flowing out of the dam(Y)') theta_pfR = np.zeros((1, 1)) theta_pfR = np.append(theta_pfR, theta_pf, axis=0) X_pfR = np.append(np.ones((X_pf.shape[0], 1)), X_pf, axis=1) x = np.arange(X.min() - 15, X.max() + 25, 0.05) x = x.reshape((x.size, 1)) x_poly = polymap(x, p)
def logistic_reg(x, y, theta, l=0, verbose=0, method='g'): """ Determines theta vector for a given polynomial degree and lambda x is a panda DataFrame y is a panda DataFrame l = 0: regularization coefficient / default is no regularization Methods for cost function minimization (default is gradient descent): 'g': gradient descent 'cg': conjugate gradient 'bfgs': BFGS (Broyden Fletcher Goldfarb Shanno) """ # Number of features n = x.shape[1] # Number of training set examples m = x.shape[0] # Number of classes K = y.shape[1] if len(theta[1]) != n + 1: print "In logistic_reg.py:\nproblem of dimension between number of features and number of parameters !!" print "Number of features:", n print "Length of theta vector:", len(theta[1]) sys.exit() for k in range(1, K + 1): theta[k] = np.array(theta[k], dtype=float) CF = CostFunction(x, y.values[:, k - 1], l) if verbose: if n == 1: from PdF_log_reg import hypothesis_function syn, hyp = hypothesis_function(x.min(), x.max(), theta[k]) plot_hyp_func(x, y[k], syn, hyp) if n == 2: plot_db(x, y[k], theta[k], lim=3, title='Initial decision boundary') if n == 3: plot_db_3d(x, y[k], theta[k], lim=3, title='Initial decision boundary') stop = 10**-3 if method == 'cg': # Conjugate gradient from scipy.optimize import fmin_cg theta[k], allvecs = fmin_cg(CF.compute_cost, theta[k], fprime=CF.compute_gradient, gtol=stop, disp=verbose, retall=True) elif method == 'bfgs': # BFGS (Broyden Fletcher Goldfarb Shanno) from scipy.optimize import fmin_bfgs theta[k], allvecs = fmin_bfgs(CF.compute_cost, theta[k], fprime=CF.compute_gradient, gtol=stop, disp=verbose, retall=True) elif method == 'g': # Gradient descent theta[k], min_cost = gradient_descent(CF, theta[k], opt=0) allvecs = None if verbose: if allvecs: min_cost = [] for vec in allvecs: min_cost.append(CF.compute_cost(vec)) nb_iter = len(min_cost) plot_cost_function(nb_iter, min_cost) plt.show() if verbose: if n == 1 and K == 1: from PdF_log_reg import hypothesis_function syn, hyp = hypothesis_function(x.min(), x.max(), theta[1]) plot_hyp_func(x, y[1], syn, hyp) if n == 2: if K != 1: plot_multiclass_2d(x, theta) else: plot_db(x, y, theta[1], title='Decision boundary') if n == 3: if K != 1: plot_multiclass_3d(x, theta) else: plot_db_3d(x, y, theta[1], title='Decision boundary') plt.show() return theta
hidden_layer_size, num_labels, Xtraining, ytraining, lam, returnType='J') gradFunc = lambda p: nnCostFunctionVec(p, input_layer_size, hidden_layer_size, num_labels, Xtraining, ytraining, lam, returnType='grad') nn_params = optimize.fmin_cg(costFunc, nn_params, fprime=gradFunc, maxiter=500) Theta1 = nn_params[0:hidden_layer_size * (input_layer_size + 1)].reshape( hidden_layer_size, input_layer_size + 1, order='F') Theta2 = nn_params[(hidden_layer_size * (input_layer_size + 1)):].reshape( num_labels, (hidden_layer_size + 1), order='F') displayData(Theta1[:, 1:]) pred = predict(Theta1, Theta2, Xtraining) pred = pred.reshape(pred.size, 1) trainingAccuracy = np.mean(pred == ytraining) * 100 print("Training accuracy: ", trainingAccuracy)
nIter = 100 nCall = 1e4 comsci = True # compare vs SciPy plotting = False # plot the result ### Solver call xs,fs,ct,Xs,it = cg(fcn,x0,nCall,\ lin=0,nIter=nIter) print "f(xs)=%f" % fs print "calls=%d" % ct print "iter=%d" % it # Scipy call if comsci == True: res = fmin_cg(fcn, x0, retall=True) ### Plotting if plotting == True: # Define meshgrid delta = 0.025 x = np.arange(min(x0[0],xstar[0])-0.5, \ max(x0[0],xstar[0])+0.5, delta) y = np.arange(min(x0[1],xstar[1])-0.5, \ max(x0[1],xstar[1])+0.5, delta) X, Y = np.meshgrid(x, y) dim = np.shape(X) # Compute function values Xv = X.flatten() Yv = Y.flatten() Input = zip(Xv, Yv)
return J def gradf(params,*args): X_train,y_train = args m,n = X_train.shape theta = params.reshape(-1,1) h = out(X_train,theta) grad = np.zeros((X_train.shape[1],1)) grad = X_train.T.dot((h-y_train)) / m g = grad.ravel() return g #res = optimize.minimize(f,x0=init_theta,args=args,method='BFGS',jac=gradf,\ # options={'gtol': 1e-6, 'disp': True}) res = optimize.fmin_cg(f,x0=params,fprime=gradf,args=args,maxiter=500) print(res) #可视化一下线性的决策边界 label = np.array(y) index_0 = np.where(label.ravel()==0) plt.scatter(X[index_0,1],X[index_0,2],marker='x'\ ,color = 'b',label = 'Not admitted',s = 15) index_1 =np.where(label.ravel()==1) plt.scatter(X[index_1,1],X[index_1,2],marker='o',\ color = 'r',label = 'Admitted',s = 15) #show the decision boundary x1 = np.arange(20,100,0.5) x2 = (- res[0] - res[1]*x1) / res[2] plt.plot(x1,x2,color = 'black')
grid_size = int((x_max - x_min) * resolution) grid = np.zeros((grid_size + 1, 2)) grid[0, 0] = np.rint(np.log(x_min / (a * (z**(-1.0 / 3.0)))) / b + 1) for i in np.arange(1, grid_size + 1): #Set box bounds grid[i, 0] = i * (x_max - x_min) / grid_size #UNITS: a.u. grid[i, 0] = np.log(grid[i, 0] / (a * (z**(-1.0 / 3.0)))) / b + 1 #Converts to g.p units grid[i, 0] = np.rint(grid[i, 0]) #Ensures integer values of g.p from scipy import optimize terms = len(coeff_vector) for jj in range(new_terms + 1): print 'entering jj' opt_coeffs = optimize.fmin_cg(err_check, coeff_vector, fprime=slope_find, epsilon=0.00001) coeff_vector = np.append(opt_coeffs, np.array([0.0])) print coeff_vector print 'final coeff_vector' print coeff_vector[0:-1] print 'final error = %s' % err_check(coeff_vector[0:-1]) print 'Total inconsistent slopes: %s' % inconsistent_slope_count[0] x = np.linspace(x_min, x_max, grid_size + 1) w = (x_max - x_min) / (grid_size + 2) plt.bar(x, grid[:, 1], width=w) #plt.title('Atomic #: ' + str(z) +' Er_stat=' + str(Er_stat)) plt.show()
for N_hdn in (1, 3, 10, 50): print("START:", N_hdn) w = np.random.randn((N_in + 1) * N_hdn + (N_hdn + 1) * N_out) * np.sqrt(10) # 確率的勾配降下法 # Err_bk=0 # Err=J(w,*(x_t,y_t)) # i=0 # while(np.fabs(Err-Err_bk)>1e-7 and i < 50000): # w = w - 0.1*gradient(w,*(x_t,y_t))/(N_hdn) # Err_bk=Err # Err=J(w,*(x_t,y_t)) # if(i%500==0):print(i,Err) # i+=1 # 共役勾配法 w = optimize.fmin_cg(J, w, fprime=gradient, args=(x_t, y_t)) #w = optimize.fmin_bfgs(J, w, fprime=gradient, args=(x_t,y_t)) #w = optimize.fmin_cg(J, w, args=args,gtol=0) w_l1 = w[:(N_in + 1) * N_hdn].reshape(N_in + 1, N_hdn) w_l2 = w[(N_in + 1) * N_hdn:].reshape(N_hdn + 1, N_out) # プロット xd = np.arange(-0., 1.001, 0.001) yd = np.zeros(xd.shape) zd = np.zeros([xd.shape[0], N_hdn + 1]) plt.subplot(2, 2, grpNo) grpNo += 1 plt.plot(x_t, y_t, 'o') for i in range(xd.shape[0]):
def train(X, y, reg): args = (X, y, reg) inital_theta = np.zeros((X.shape[1], 1)) params = inital_theta.ravel() res = optimize.fmin_cg(f, x0=params, fprime=gradf, args=args, maxiter=500) return res
from models.optimizers import ConjugateGradientAlgorithm from functions import functionObj, exercise61 from scipy.optimize import fmin_cg import autograd.numpy as np x_0 = np.zeros(16) f_x = exercise61 print('-----------Non-linear Conjugate from Scipy-----------') f_x_obj = functionObj(f_x) x_min, f_min, _, _, _, = fmin_cg(f_x_obj, x_0, full_output=True) x_min = f_x_obj.best_x f_min = f_x_obj.best_f print('X: ', x_min) print('F: ', f_min) print('Function evals: %d' % (f_x_obj.fevals)) print('-----------ConjugateDescentAlgorithm-----------') f_x_obj = functionObj(f_x) opt = ConjugateGradientAlgorithm(f_x_obj, x_0, 1e3, xtol=1e-6) opt.find_min() x_min = f_x_obj.best_x._value f_min = f_x_obj.best_f._value print('X: ', x_min) print('F: ', f_min) print('Function evals: %d' % (f_x_obj.fevals))
# Train NN _lambda = 1 # Debug c = 0 def debug(_): global c c = c + 1 print("Iteration #{c}".format(**globals())) print('Training...') nn_params = fmin_cg( nn_cost_function(input_layer_size, hidden_layer_size, num_labels, X, Y, _lambda), init_nn_params, fprime=nn_gradients(input_layer_size, hidden_layer_size, num_labels, X, Y, _lambda), maxiter=50, callback=debug ) # Reshape theta_1 = nn_params[0:(hidden_layer_size*(input_layer_size+1))]. \ reshape([hidden_layer_size, input_layer_size+1]) theta_2 = nn_params[(hidden_layer_size*(input_layer_size+1)):]. \ reshape([num_labels, hidden_layer_size+1]) # Display data display_data(theta_1[:, 1:], save=True, file_name='2.png') # Estimate performance a = forward_prop(X)(theta_1, theta_2)['a'][-1]
point = [1234., -500., 10., 0.001] # both cg and nm does fine point = [1000, -100, 0, 1] # cg will do badly on this one # this will try nelder-mead from an unconverged DE solution #point = dstepmon.x[-150] # simplex, esow = Monitor(), Monitor() solver = fmin(len(point)) solver.SetInitialPoints(point) solver.SetEvaluationMonitor(esow) solver.SetGenerationMonitor(simplex) solver.Solve(cost_function, CRT()) sol = solver.Solution() print "\nsimplex solution: ", sol # solcg = fmin_cg(cost_function, point) print "\nConjugate-Gradient (Polak Rubiere) : ", solcg # if leastsq: sollsq = leastsq(vec_cost_function, point) sollsq = sollsq[0] print "\nLeast Squares (Levenberg Marquardt) : ", sollsq # legend = [ 'Noisy data', 'Differential Evolution', 'Nelder Mead', 'Polak Ribiere' ] plot_noisy_data() plot_sol(desol, 'r-') plot_sol(sol, 'k--') plot_sol(solcg, 'b-.') if leastsq:
def optimizeTheta(mytheta,myX,myy,mylambda=0.): result = optimize.fmin_cg(computeCost, fprime=costGradient, x0=mytheta, \ args=(myX, myy, mylambda), maxiter=50, disp=False,\ full_output=True) return result[0], result[1]
def recommender(): cur = mysql.connection.cursor() num_places = 48 reg_param = 30 #regularisation function #contains the ratings given by the users to different places cur.execute("SELECT * FROM ratings") rate = cur.fetchall() num = len(rate) ratings = [[0 for i in range(num)] for j in range(48)] #transpose the matrix for i in range(num): for j in range(48): ratings[j][i] = rate[i][j] num_users = num #did_rate checks if a user has rated a places did_rate = [[0 for i in range(num_users)] for j in range(num_places)] for i in range(num_places): for j in range(num_users): if ratings[i][j] != 0: did_rate[i][j] = 1 ratings = np.array(ratings) #print(did_rate) #print() #return str(ratings) #Normalize our data #Normalization makes the average of the data as a 0 ratings, ratings_mean = normalize_rat(ratings, did_rate) #return str(ratings_mean) #print(ratings) #update the number of users num_users = ratings.shape[1] num_features = 13 #how much of each feature is present cur.execute("SELECT * FROM place_features") place_features = cur.fetchall() place_features = np.array(place_features) place_features, place_mean = normalize(place_features) #print('place_features') #print(place_features) #print() #return str(place_features) #what kind of place a user would prefer cur.execute("SELECT * FROM user_prefs") user_prefs = cur.fetchall() user_prefs = np.array(user_prefs) user_prefs, user_mean = normalize(user_prefs) #print("user_prefs") #print(user_prefs) #return str(user_prefs) #X=place features and theta=user pref y=X*theta initial_X_and_theta = r_[place_features.T.flatten(), user_prefs.T.flatten()] #return str(initial_X_and_theta) #we are going to use gradient descent #performing gradient descent minimized_cost_and_optimal_params = optimize.fmin_cg( calculate_cost, fprime=calculate_gradient, x0=initial_X_and_theta, args=(ratings, did_rate, num_users, num_places, num_features, reg_param), maxiter=100, disp=True, full_output=True) #return str(minimized_cost_and_optimal_params) cost, optimal_place_features_and_user_prefs = minimized_cost_and_optimal_params[ 1], minimized_cost_and_optimal_params[0] #return str(optimal_place_features_and_user_prefs) place_features, user_prefs = unroll_params( optimal_place_features_and_user_prefs, num_users, num_places, num_features) #print(place_features) #return str(place_features) all_predictions = place_features.dot(user_prefs.T) #return str(ratings_mean) predictions_for_user = all_predictions[:, 0:1] + ratings_mean #print("Final ratings I would give to the Places:") #return str(all_predictions) final_output = [] for i in range(num_places): final_output.append([predictions_for_user[i], i + 1]) final_output.sort(reverse=True) #return str(final_output[1][0]) cur.execute("SELECT * FROM place_id") place_id = cur.fetchall() cur.execute("DROP TABLE results") cur.execute("CREATE TABLE results (place VARCHAR(32))") #return str(final_output) for i in range(10): cur.execute("SELECT place from place_id WHERE id =" + str(final_output[i][1])) #return str(final_output[i][1]) ans = cur.fetchone() #return str(ans) m = str(ans[0]) #return str(m) cur.execute("INSERT INTO results (place) VALUES(%s)", [str(m)]) #return str(pl) #print(final_output[i][0]," ",final_output[i][1]) #print(predictions_for_user) #print(len(predictions_for_user)) cur.execute("SELECT * FROM results") userDetails = cur.fetchall() mysql.connection.commit() cur.close() return render_template('users.html', userDetails=userDetails)
def minimize(func, x0, gradient=None, hessian=None, algorithm="default", verbose=False, **args): r""" This function is an interface to a variety of algorithms for computing the minimum of a function of several variables. INPUT: - ``func`` -- Either a symbolic function or a Python function whose argument is a tuple with `n` components - ``x0`` -- Initial point for finding minimum. - ``gradient`` -- Optional gradient function. This will be computed automatically for symbolic functions. For Python functions, it allows the use of algorithms requiring derivatives. It should accept a tuple of arguments and return a NumPy array containing the partial derivatives at that point. - ``hessian`` -- Optional hessian function. This will be computed automatically for symbolic functions. For Python functions, it allows the use of algorithms requiring derivatives. It should accept a tuple of arguments and return a NumPy array containing the second partial derivatives of the function. - ``algorithm`` -- String specifying algorithm to use. Options are ``'default'`` (for Python functions, the simplex method is the default) (for symbolic functions bfgs is the default): - ``'simplex'`` -- using the downhill simplex algorithm - ``'powell'`` -- use the modified Powell algorithm - ``'bfgs'`` -- (Broyden-Fletcher-Goldfarb-Shanno) requires gradient - ``'cg'`` -- (conjugate-gradient) requires gradient - ``'ncg'`` -- (newton-conjugate gradient) requires gradient and hessian - ``verbose`` -- (optional, default: False) print convergence message .. NOTE:: For additional information on the algorithms implemented in this function, consult SciPy's `documentation on optimization and root finding <https://docs.scipy.org/doc/scipy/reference/optimize.html>`_ EXAMPLES: Minimize a fourth order polynomial in three variables (see the :wikipedia:`Rosenbrock_function`):: sage: vars = var('x y z') sage: f = 100*(y-x^2)^2+(1-x)^2+100*(z-y^2)^2+(1-y)^2 sage: minimize(f, [.1,.3,.4]) # abs tol 1e-6 (1.0, 1.0, 1.0) Try the newton-conjugate gradient method; the gradient and hessian are computed automatically:: sage: minimize(f, [.1, .3, .4], algorithm="ncg") # abs tol 1e-6 (1.0, 1.0, 1.0) We get additional convergence information with the `verbose` option:: sage: minimize(f, [.1, .3, .4], algorithm="ncg", verbose=True) Optimization terminated successfully. ... (0.9999999..., 0.999999..., 0.999999...) Same example with just Python functions:: sage: def rosen(x): # The Rosenbrock function ....: return sum(100.0r*(x[1r:]-x[:-1r]**2.0r)**2.0r + (1r-x[:-1r])**2.0r) sage: minimize(rosen, [.1,.3,.4]) # abs tol 3e-5 (1.0, 1.0, 1.0) Same example with a pure Python function and a Python function to compute the gradient:: sage: def rosen(x): # The Rosenbrock function ....: return sum(100.0r*(x[1r:]-x[:-1r]**2.0r)**2.0r + (1r-x[:-1r])**2.0r) sage: import numpy sage: from numpy import zeros sage: def rosen_der(x): ....: xm = x[1r:-1r] ....: xm_m1 = x[:-2r] ....: xm_p1 = x[2r:] ....: der = zeros(x.shape, dtype=float) ....: der[1r:-1r] = 200r*(xm-xm_m1**2r) - 400r*(xm_p1 - xm**2r)*xm - 2r*(1r-xm) ....: der[0] = -400r*x[0r]*(x[1r]-x[0r]**2r) - 2r*(1r-x[0]) ....: der[-1] = 200r*(x[-1r]-x[-2r]**2r) ....: return der sage: minimize(rosen, [.1,.3,.4], gradient=rosen_der, algorithm="bfgs") # abs tol 1e-6 (1.0, 1.0, 1.0) """ from sage.structure.element import Expression from sage.ext.fast_callable import fast_callable import numpy from scipy import optimize if isinstance(func, Expression): var_list = func.variables() var_names = [str(_) for _ in var_list] fast_f = fast_callable(func, vars=var_names, domain=float) f = lambda p: fast_f(*p) gradient_list = func.gradient() fast_gradient_functions = [ fast_callable(gradient_list[i], vars=var_names, domain=float) for i in range(len(gradient_list)) ] gradient = lambda p: numpy.array( [a(*p) for a in fast_gradient_functions]) else: f = func if algorithm == "default": if gradient is None: min = optimize.fmin(f, [float(_) for _ in x0], disp=verbose, **args) else: min = optimize.fmin_bfgs(f, [float(_) for _ in x0], fprime=gradient, disp=verbose, **args) else: if algorithm == "simplex": min = optimize.fmin(f, [float(_) for _ in x0], disp=verbose, **args) elif algorithm == "bfgs": min = optimize.fmin_bfgs(f, [float(_) for _ in x0], fprime=gradient, disp=verbose, **args) elif algorithm == "cg": min = optimize.fmin_cg(f, [float(_) for _ in x0], fprime=gradient, disp=verbose, **args) elif algorithm == "powell": min = optimize.fmin_powell(f, [float(_) for _ in x0], disp=verbose, **args) elif algorithm == "ncg": if isinstance(func, Expression): hess = func.hessian() hess_fast = [[ fast_callable(a, vars=var_names, domain=float) for a in row ] for row in hess] hessian = lambda p: [[a(*p) for a in row] for row in hess_fast] from scipy import dot hessian_p = lambda p, v: dot(numpy.array(hessian(p)), v) min = optimize.fmin_ncg(f, [float(_) for _ in x0], fprime=gradient, \ fhess=hessian, fhess_p=hessian_p, disp=verbose, **args) return vector(RDF, min)
#val = N*0.5*(np.trace(np.dot(inv,delta)) + np.trace(np.dot(np.dot(np.dot(-inv,delta),inv),S))) val = np.reshape(val, (D * L, )) #print(val) return val #main D = 10 L = 2 N = 100 x, Y_pure = generateData() #print(Y_pure.shape) sig = [0.1, 0.4, 1] W0 = np.reshape(random.rand(20), (D, L)) print(W0) for i in range(len(sig)): sigma = sig[i] Y = Y_pure #Y = Y_pure + random.multivariate_normal(np.zeros((D)), sigma*np.eye(D), N) Wstar = opt.fmin_cg(f, W0, fprime=dfx) #print(Wstar) Wstar = np.reshape(Wstar, (D, L)) square = np.dot(np.transpose(Wstar), Wstar) t_recover = np.dot(np.dot(Y, Wstar), np.linalg.inv(square)) #print(t_recover.shape) plt.figure() dot = plt.plot(t_recover[:, 0], t_recover[:, 1], 'r.') plt.title('the retrieve latent X with sigma: ' + str(sigma)) plt.legend([dot], ["Recovered X"]) plt.show()
else: ergs,frcs= gather_smd_data(maindir) if fmethod in ('test','TEST') and potential in ('NN'): NN.init(maindir,params,sample_dirs,samples,nprcs,fmatch \ ,ergrefs,frcrefs,fmethod,parfile,runmode \ ,rcut,pranges,vranges) output_energy_relation(ergs,ergrefs,samples,sample_dirs,fname='out.erg.pmd-vs-dft.ini') output_force_relation(frcs,frcrefs,samples,sample_dirs,fname='out.frc.pmd-vs-dft.ini') if fmethod in ('cg','CG','conjugate-gradient'): print '>>>>> conjugate-gradient was selected.' if gradient in ('numerical'): solution= opt.fmin_cg(func,vars,args=(maindir,) ,maxiter=niter,disp=True ,epsilon=eps,gtol=gtol) else: if potential in ('linreg'): solution= opt.fmin_cg(func,vars,args=(maindir,) ,fprime=grad_linreg ,maxiter=niter,disp=True ,gtol=gtol) elif potential in ('NN'): solution= opt.fmin_cg(func,vars \ ,args=(maindir,) \ ,fprime=NN.grad \ ,maxiter=niter,disp=True \ ,gtol=gtol) print ' CG solution:',solution
def fprime(xk, f, epsilon, *args): f0 = f(*((xk, ) + args)) grad = np.zeros((xk.shape), float) ei = np.zeros((xk.shape), float) for i in range(xk.shape[0]): ei[i, -1] = 1.0 d = np.multiply(epsilon, ei) grad[i, -1] = (f(*((xk + d, ) + args)) - f0) / d[i, -1] ei[i, -1] = 0.0 return grad if __name__ == "__main__": fname = "../datas/DLBCL-Stanford/DLBCL-Stanford.mat" fname = "../datas/ColonTumor/colonTumor.mat" print fname X, Y = read_data(fname) X_index = np.array((1, 2, 4, 5)) X_model = X[:, X_index] d, c = X_model.shape[1], Y.shape[1] W = np.ones((d, c)) from scipy import optimize as opt threshold = 0.01 W = opt.fmin_cg(F, W, args=(X_model, Y, threshold)).reshape((d, c)) W = np.matrix(W) print "W = %s" % str(W) eps = np.sqrt(np.finfo(float).eps) eps_mat = eps * W w_prime = fprime(W, F, eps_mat, X_model, Y, threshold) print "w_prime = %s" % str(w_prime)
def iresolve(images, tf_matrices, scale=1.3, initial_guess=initial_guess_avg, initial_guess_args={}, camera=None, camera_args={}, cost_measure=None, cost_args={}): """Super-resolve a set of low-resolution images. Parameters ---------- images : list of ndarrays Low-resolution input frames. tf_matrices : list of (3, 3) ndarrays List of transformation matrices to transform each low-resolution frame to a reference image (typically, ``images[0]``). scale : float Resolution improvement required. initial_guess : callable, f(imgs, Hs, scale, oshape, **initial_guess_args) Function that calculates an initial estimate of the high-resolution image for initialising the iterative process. If not specified, ``initial_guess_avg`` is used. See ``initial_guess_avg`` for more information. initial_guess_args : dict, optional Optional keyword arguments for `initial_guess`. camera : callable, f(nr, img, H, scale, oshape, **camera_args), optional Function that emulates the effect of the camera on a high-resolution frame. See the docstring of ``default_camera`` for more detail. If not specified, ``default_camera`` is used. camera_args : dict, optional Optional keyword arguments for `camera`. cost_measure : callable, f(nr, x, y, **cost_args) Function that calculates the difference between two low-resolution frames. If not specified, ``cost_squared_error`` is used. cost_args : dict, optional Optional keyword arguments for `cost_measure`. Returns ------- out : ndarray Super-resolved image. """ if camera is None: camera = default_camera if cost_measure is None: cost_measure = cost_squared_error oshape = [int(i) for i in np.array(images[0].shape) * float(scale)] HR = initial_guess(images, tf_matrices, scale=scale, oshape=oshape, **initial_guess_args) HR_guess = HR.copy() def sr_func(HR, it=[0]): if it[0] % 100 == 0: log.info('Saving output for function call %d' % it[0]) np.save('HR', HR.reshape(oshape)) it[0] += 1 err = 0 save_shape = HR.shape HR.shape = oshape for i, (H, LR) in enumerate(zip(tf_matrices, images)): LR_est = camera(i, HR, H, scale, images[0].shape, **camera_args) err += cost_measure(i, LR, LR_est, HR, HR_guess, **cost_args) HR.shape = save_shape return err def callback(x, it=[1]): it[0] += 1 log.info('Iteration #%d' % it[0]) tic = time.time() log.info('Starting optimisation. This may take a long time (hour).') HR = opt.fmin_cg(sr_func, HR, callback=callback, maxiter=5) toc = time.time() log.info('Operation took %.2f seconds' % (toc - tic)) return HR.reshape(oshape)
x_t = data[idx:idx + N_td] y_t = num2cls(label[idx:idx + N_td]) # 学習前の交差エントロピー誤差関数 J_classification(w, *(x_t, y_t)) # 確率的勾配降下法 # eta=0.001 # for k in range(100): # w = w - eta * gradient(w,*(x_t,y_t)) # print(k) # 共役勾配法 w = optimize.fmin_cg(J_classification, w, fprime=gradient, args=(x_t, y_t), gtol=1) # 学習後の交差エントロピー誤差関数 J_classification(w, *(x_t, y_t)) # チェック print("TEST") w_l1 = w[:(N_in + 1) * N_hdn].reshape(N_in + 1, N_hdn) w_l2 = w[(N_in + 1) * N_hdn:].reshape(N_hdn + 1, N_out) okn = 0 testN = 5000 for i in range(testN): # idx=np.random.randint(60000) idx = i
def yaw_solve(yaw_image, yaw, scale, tol=1e-10, iter_lim=None, damp=1e-1, method='CG', norm=2): """Super-resolve a nonzero yaw image by solving a large, sparse set of linear equations. This method approximates the camera with a downsampling operator, using polygon interpolation. The LSQR method is used to solve the equation :math:`A\mathbf{x} = b` where :math:`A` is the downsampling operator, :math:`\mathbf{x}` is the high-resolution estimate (flattened in raster scan/ lexicographic order), and :math:`\mathbf{b}` is a vector of all the yaw_image pixels. Parameters ---------- yaw_image : ndarray Nonzero yaw input frame. tf_matrix : (3, 3) ndarray Transformation matrix that relates all yaw_image pixels to a reference high-resolution frame. scale : float The resolution of the output image is `scale` times the resolution of the input images. damp : float, optional If an initial guess is provided, `damp` specifies how much that estimate is weighed in the entire process. A larger value of `damp` results in a solution closer to `x0`, whereas a smaller version of `damp` yields a solution closer to the solution obtained without any initial estimate. method : {'CG', 'LSQR', 'descent', 'L-BFGS-B'} Whether to use conjugate gradients, least-squares, gradient descent or L-BFGS-B to determine the solution. norm : {1, 2} Whether to use the L1 or L2 norm to measure errors between images. Returns ------- HR : ndarray High-resolution estimate. """ ishape = yaw_image.shape oshape = yaw_image.shape print "Constructing camera operator..." op = poly_interp_op_yaw(oshape[0], oshape[1], ishape[0], ishape[1], yaw, scale, search_win=round(scale) * 2 + 1) #dop = op.todense() #dsDebug = gdal.GetDriverByName("GTIFF").Create('/tmp/debug.tif', opd.shape[1], opd.shape[0], 1, gdal.GDT_Float32) #dsDebug.GetRasterBand(1).WriteArray(opd) #dsDebug.FlushCache() M = np.prod(ishape) b = yaw_image.flat atol = btol = conlim = tol show = True # Construct the prior opT = op.T opT_sum1 = opT.sum( axis=1).flatten() + 0.00001 # add small bias to avoid division by zero opTb = opT.dot(b) x0 = opTb / opT_sum1 #return x0.reshape(oshape) # Error and gradient functions, used in conjugate gradient optimisation def sr_func(x, norm=norm): return (np.linalg.norm(op * x - b, norm) ** 2 + \ damp * np.linalg.norm(x - x0.flat, norm) ** 2) def sr_gradient(x, norm=norm): # Careful! Mixture of sparse and dense operators. #Axb = op * x - b #nrm_sq = np.dot(Axb, Axb) # Dense #Axbop = (op.T * Axb).T # Sparse #return nrm_sq * Axbop Axb = op * x - b L = len(x) if norm == 1: xmx0 = x - x0.flat term1 = np.linalg.norm(Axb, 1) * np.sign(Axb.T) * op term2 = damp * np.linalg.norm(xmx0, 1) * np.sign(xmx0.flat) elif norm == 2: term1 = (Axb.T * op) term2 = damp * (x - x0.flat) else: raise ValueError('Invalid norm for error measure (%s).' % norm) return 2 * (term1 + term2) print "Super resolving..." ## Conjugate Gradient Optimisation if method == 'CG': x, fopt, f_calls, gcalls, warnflag = \ opt.fmin_cg(sr_func, x0, fprime=sr_gradient, gtol=0, disp=True, maxiter=iter_lim, full_output=True) elif method == 'LSQR': ## LSQR Optimisation ## x0 = x0.flat b = b - op * x0 x, istop, itn, r1norm, r2norm, anorm, acond, arnorm, xnorm, var = \ lsqr(op, b, atol=atol, btol=btol, conlim=conlim, damp=damp, show=show, iter_lim=iter_lim) x = x0 + x elif method == 'descent': ## Steepest Descent Optimisation ## x = np.array(x0, copy=True).reshape(np.prod(x0.shape)) for i in range(50): print(op.T * ((op * x) - b)).shape print "Gradient descent step %d" % i x += damp * -1 * (op.T * ((op * x) - b)) # Could add prior: + lam * (x - x0.flat)) ## L-BFGS-B elif method == 'L-BFGS-B': x, f, d = opt.fmin_l_bfgs_b(sr_func, x0.flat, fprime=sr_gradient) print "L-BFGS-B converged after %d function calls." % d['funcalls'] print "Final function value:", f print "Reason for termination:", d['task'] elif method == 'direct': x = sparse.linalg.spsolve(op, b) else: raise ValueError('Invalid method (%s) specified.' % method) return x.reshape(oshape), x0.reshape(oshape)
def solve(images, tf_matrices, scale, x0=None, tol=1e-10, iter_lim=None, damp=1e-1, method='CG', operator='bilinear', norm=1, standard_form=False): """Super-resolve a set of low-resolution images by solving a large, sparse set of linear equations. This method approximates the camera with a downsampling operator, using bilinear or polygon interpolation. The LSQR method is used to solve the equation :math:`A\mathbf{x} = b` where :math:`A` is the downsampling operator, :math:`\mathbf{x}` is the high-resolution estimate (flattened in raster scan/ lexicographic order), and :math:`\mathbf{b}` is a stacked vector of all the low-resolution images. Parameters ---------- images : list of ndarrays Low-resolution input frames. tf_matrices : list of (3, 3) ndarrays Transformation matrices that relate all low-resolution frames to a reference low-resolution frame (usually ``images[0]``). scale : float The resolution of the output image is `scale` times the resolution of the input images. x0 : ndarray, optional Initial guess of HR image. damp : float, optional If an initial guess is provided, `damp` specifies how much that estimate is weighed in the entire process. A larger value of `damp` results in a solution closer to `x0`, whereas a smaller version of `damp` yields a solution closer to the solution obtained without any initial estimate. method : {'CG', 'LSQR', 'descent', 'L-BFGS-B'} Whether to use conjugate gradients, least-squares, gradient descent or L-BFGS-B to determine the solution. operator : {'bilinear', 'polygon'} The camera model is approximated as an interpolation process. The bilinear interpolation operator only works well for zoom ratios < 2. norm : {1, 2} Whether to use the L1 or L2 norm to measure errors between images. standard_form : bool Whether to convert the matrix operator to standard form before processing. Returns ------- HR : ndarray High-resolution estimate. """ assert len(images) == len(tf_matrices) HH = [H.copy() for H in tf_matrices] HH_scaled = [] scale = float(scale) for H in HH: HS = np.array([[scale, 0, 0], [0, scale, 0], [0, 0, 1]]) HH_scaled.append(np.linalg.inv(np.dot(HS, H))) HH = HH_scaled oshape = np.floor(np.array(images[0].shape) * scale) LR_shape = images[0].shape print "Constructing camera operator (%s)..." % operator if operator == 'bilinear': op = bilinear(oshape[0], oshape[1], HH, *LR_shape, boundary=0) elif operator == 'polygon': sub_ops = [] for H in HH: sub_ops.append( poly_interp_op(oshape[0], oshape[1], H, *LR_shape, search_win=round(scale) * 2 + 1)) op = sparse.vstack(sub_ops, format='csr') else: raise ValueError('Invalid operator requested (%s).' % operator) ## Visualise mapping of frames ## ## import matplotlib.pyplot as plt ## P = np.prod(LR_shape) ## img = (op * x0.flat).reshape(LR_shape) ## plt.subplot(1, 4, 1) ## plt.imshow(x0, cmap=plt.cm.gray) ## plt.title('x0') ## plt.subplot(1, 4, 2) ## plt.imshow(images[0], cmap=plt.cm.gray) ## plt.title('LR frame') ## plt.subplot(1, 4, 3) ## plt.imshow(img, cmap=plt.cm.gray) ## plt.title('LR image Ax0') ## plt.subplot(1, 4, 4) ## plt.imshow(images[0] - img, cmap=plt.cm.gray) ## plt.title('diff images[0] - Ax') ## plt.show() if standard_form: print "Bringing matrix to standard form..." P = ordering.standard_form(op) op = P * op k = len(images) M = np.prod(LR_shape) b = np.empty(k * M) for i in range(k): b[i * M:(i + 1) * M] = images[i].flat if standard_form: b = P * b atol = btol = conlim = tol show = True # Construct the prior opT = op.T opT_sum1 = opT.sum( axis=1).flatten() + 0.00001 # add small bias to avoid division by zero opTb = opT.dot(b) x0 = opTb / opT_sum1 #return x0.reshape(oshape) # Error and gradient functions, used in conjugate gradient optimisation def sr_func(x, norm=norm): return (np.linalg.norm(op * x - b, norm) ** 2 + \ damp * np.linalg.norm(x - x0.flat, norm) ** 2) def sr_gradient(x, norm=norm): # Careful! Mixture of sparse and dense operators. #Axb = op * x - b #nrm_sq = np.dot(Axb, Axb) # Dense #Axbop = (op.T * Axb).T # Sparse #return nrm_sq * Axbop Axb = op * x - b L = len(x) if norm == 1: xmx0 = x - x0.flat term1 = np.linalg.norm(Axb, 1) * np.sign(Axb.T) * op term2 = damp * np.linalg.norm(xmx0, 1) * np.sign(xmx0.flat) elif norm == 2: term1 = (Axb.T * op) term2 = damp * (x - x0.flat) else: raise ValueError('Invalid norm for error measure (%s).' % norm) return 2 * (term1 + term2) print "Super resolving..." ## Conjugate Gradient Optimisation if method == 'CG': x, fopt, f_calls, gcalls, warnflag = \ opt.fmin_cg(sr_func, x0, fprime=sr_gradient, gtol=0, disp=True, maxiter=iter_lim, full_output=True) elif method == 'LSQR': ## LSQR Optimisation ## x0 = x0.flat b = b - op * x0 x, istop, itn, r1norm, r2norm, anorm, acond, arnorm, xnorm, var = \ lsqr(op, b, atol=atol, btol=btol, conlim=conlim, damp=damp, show=show, iter_lim=iter_lim) x = x0 + x elif method == 'descent': ## Steepest Descent Optimisation ## x = np.array(x0, copy=True).reshape(np.prod(x0.shape)) for i in range(50): print(op.T * ((op * x) - b)).shape print "Gradient descent step %d" % i x += damp * -1 * (op.T * ((op * x) - b)) # Could add prior: + lam * (x - x0.flat)) ## L-BFGS-B elif method == 'L-BFGS-B': x, f, d = opt.fmin_l_bfgs_b(sr_func, x0.flat, fprime=sr_gradient) print "L-BFGS-B converged after %d function calls." % d['funcalls'] print "Final function value:", f print "Reason for termination:", d['task'] elif method == 'direct': x = sparse.linalg.spsolve(op, b) else: raise ValueError('Invalid method (%s) specified.' % method) return x.reshape(oshape)
print("Training error | Cross validation error | Accuracy on test set") for i in range(25): iterative(la) la = la * 2 elif task == 2: # Learning curves print("\n=================================================") print("Learning curves\n") print("lambda=", la) print("# of training samples | Training error | Cross validation error") for i in range(Xtrain.shape[0] // 50): Xtrain = Xt[0:i * 50 + 1, :] ytrain = yt[0:i * 50 + 1, :] answer = sc.fmin_cg(calculateJ, rndInit, calculateGrad, maxiter=90, disp=False) J1 = J(answer, 900, 25, 10, Xtrain, ytrain, 0) J2 = J(answer, 900, 25, 10, Xcv, ycv, 0) print(i * 50, J1, J2) elif task == 3: # Gradient checnking print("\n=================================================") print("Gradient checking\n") print( "Calcualting theta values with backpropagation (will takes some time)") num_input = 400 answer = sc.fmin_cg(calculateJ, rndInit, calculateGrad,
def scipy_cg(project,x0=None,xb=None,its=100,accu=1e-10,grads=True): """ result = scipy_cg(project,x0=[],xb=[],its=100,accu=1e-10) Runs the Scipy implementation of CG with an SU2 project Inputs: project - an SU2 project x0 - optional, initial guess xb - optional, design variable bounds its - max outer iterations, default 100 accu - accuracy, default 1e-10 Outputs: result - the outputs from scipy.fmin_slsqp """ # import scipy optimizer from scipy.optimize import fmin_cg # handle input cases if x0 is None: x0 = [] if xb is None: xb = [] # function handles func = obj_f # gradient handles if project.config.get('GRADIENT_METHOD','NONE') == 'NONE': fprime = None else: fprime = obj_df # number of design variables n_dv = len( project.config['DEFINITION_DV']['KIND'] ) project.n_dv = n_dv # Initial guess if not x0: x0 = [0.0]*n_dv # prescale x0 dv_scales = project.config['DEFINITION_DV']['SCALE'] x0 = [ x0[i]/dv_scl for i,dv_scl in enumerate(dv_scales) ] # scale accuracy obj = project.config['OPT_OBJECTIVE'] obj_scale = obj[obj.keys()[0]]['SCALE'] accu = accu*obj_scale # scale accuracy eps = 1.0e-04 # optimizer summary sys.stdout.write('Conjugate gradient (CG) parameters:\n') sys.stdout.write('Number of design variables: ' + str(n_dv) + '\n') sys.stdout.write('Objective function scaling factor: ' + str(obj_scale) + '\n') sys.stdout.write('Maximum number of iterations: ' + str(its) + '\n') sys.stdout.write('Requested accuracy: ' + str(accu) + '\n') sys.stdout.write('Initial guess for the independent variable(s): ' + str(x0) + '\n') sys.stdout.write('Lower and upper bound for each independent variable: ' + str(xb) + '\n\n') # Evaluate the objective function (only 1st iteration) obj_f(x0,project) # Run Optimizer outputs = fmin_cg( x0 = x0 , f = func , fprime = fprime , args = (project,) , gtol = accu , epsilon = eps , maxiter = its , full_output = True , disp = True , retall = True ) # Done return outputs
X = np.random.randn(num_movies, num_features) theta = np.random.randn(num_users, num_features) initial_params = np.concatenate([X.flatten(), theta.flatten()]) lmd = 10 def cost_func(p): return ccf.cofi_cost_function(p, Ynorm, R, num_users, num_movies, num_features, lmd)[0] def grad_func(p): return ccf.cofi_cost_function(p, Ynorm, R, num_users, num_movies, num_features, lmd)[1] theta, *unused = opt.fmin_cg(cost_func, fprime=grad_func, x0=initial_params, maxiter=100, disp=False, full_output=True) # Unfold the returned theta back into U and W X = theta[0:num_movies * num_features].reshape((num_movies, num_features)) theta = theta[num_movies * num_features:].reshape((num_users, num_features)) print('Recommender system learning completed') print(theta) input('Program paused. Press ENTER to continue') # ===================== Part 8: Recommendation for you ===================== # After training the model, you can now make recommendations by computing # the predictions matrix. # p = np.dot(X, theta.T)
def max_sase_bump(self, bump, alpha, method='simplex', params={}, opt_pointing=False): ''' direct sase optimization with simplex, using correctors as a multiknob ''' if self.debug: print('starting multiknob optimization, correctors = ', bump) if opt_pointing: weight_gmd_bpm_1 = 10.0 weight_gmd_bpm_2 = 10.0 else: weight_gmd_bpm_1 = 0.0 weight_gmd_bpm_2 = 0.0 def error_func(x): print(self.dp) pen_max = 100.0 if abs(x) > 1: return pen_max dI = bump["dI"] currents = bump["currents"] #print 'error_func: ', bpm_names, '->', planes correctors_ = bump["correctors"] for i in range(len(correctors_)): print("alpha = ", x) print('{0} x[{1}]={2}'.format(correctors_[i], i, currents[i] + dI[i] * x)) limits = self.dp.get_limits(correctors_[i]) print('limits=[{0}, {1}]'.format(limits[0], limits[1])) if currents[i] + dI[i] * x < limits[ 0] or currents[i] + dI[i] * x > limits[1]: print('limits exceeded') return pen_max for i in range(len(correctors_)): print('setting', correctors_[i], '->', currents[i] + dI[i] * x) self.mi.set_value(correctors_[i], currents[i] + dI[i] * x) sleep(self.timeout) sase = self.mi.get_sase(detector=self.detector) alarm = np.max(self.mi.get_alarms()) #z1, z2 = get_sase_pos() if self.debug: print('alarm:', alarm) if self.debug: print('sase:', sase) #print 'pointing', z1, z2, 'weights', weight_gmd_bpm_1, weight_gmd_bpm_2 pen = 0.0 if alarm > 1.0: return pen_max if alarm > 0.7: return alarm * 50.0 pen += alarm pen -= sase if self.debug: print('penalty:', pen) return pen sase_ref = self.mi.get_sase(detector=self.detector) x = alpha x_init = x if self.logging: f = open(self.log_file, 'a') f.write('\n*** optimization step ***\n') f.write(str(bump["correctors"]) + '\n') f.write(method + '\n') f.write('x0=' + str(x_init) + '\n') f.write('sase0=' + str(sase_ref) + '\n') if method == 'cg': print('using CG optimizer, params:', params) try: max_iter = params['maxiter'] except KeyError: max_iter = 10 * len(x) try: epsilon = params['epsilon'] except KeyError: epsilon = 0.1 try: gtol = params['gtol'] except KeyError: gtol = 1.e-3 opt.fmin_cg(error_func, x, gtol=gtol, epsilon=epsilon, maxiter=max_iter) if method == 'simplex': print('using simplex optimizer, params:', params) try: max_iter = params['maxiter'] except KeyError: max_iter = 10 * len(bump["correctors"]) try: xtol = params['xtol'] except KeyError: xtol = 1.e-3 opt.fmin(error_func, x, xtol=xtol, maxiter=max_iter) if method == 'powell': print('using powell optimizer, params:', params) try: max_iter = params['maxiter'] except KeyError: max_iter = 10 * len(x) try: xtol = params['xtol'] except KeyError: xtol = 1.e-3 opt.fmin_powell(error_func, x, xtol=xtol, maxiter=max_iter) if method == 'fancy_stuff_from': print('using fancy optimizer, params:', params) pass sase_new = self.mi.get_sase(detector=self.detector) print('step ended changing sase from/to', sase_ref, sase_new) if sase_new <= sase_ref: for i in range(len(bump["correctors"])): print('reverting', bump["correctors"][i], '->', bump["currents"][i]) self.mi.set_value(bump["correctors"][i], bump["currents"][i]) if self.logging: f.write('sase_new=' + str(sase_new) + '\n') f.close()
Xmat = np.ones(m) for x in x_data: Xmat = np.column_stack((Xmat, x)) return Xmat # cost function def Jcost(theta, X, y): m = len(y) h = expit(X @ theta) cost = -(y * np.log(h)) - ((1 - y) * np.log(1 - h)) J = (1 / m) * np.sum(cost) return J # gradient def Jcost_prime(theta, X, y): m = len(y) return (1 / m) * (X.T @ (X @ theta - y)) # conjugated gradient descend drat, am = choose_data(mtdata, 'drat', 'am') # extract data drat_nor, = feat_scale(drat) # scaling X = make_X(drat_nor) # construct the data matrix m, k = X.shape y = am # construct the y vector theta0 = np.array([0, 10]) # initial guess of tehta res = op.fmin_cg(Jcost, theta0, fprime=Jcost_prime, args=(X, y))
def max_sase(self, devices, method='simplex', params={}, opt_pointing=False): ''' direct sase optimization with simplex, using correctors as a multiknob ''' if self.debug: print('starting multiknob optimization, correctors = ', devices) def error_func(x, x_init, tols): if self.debug: print("X_relative = ", x, x_init, tols) x = x_init + (x - 1) * tols / (10. * 0.05) # relative to absolute if self.debug: print("X_absolute = ", x, x_init) if self.debug: print("isRunning:", self.isRunning) self.niter += 1 print("number of func eval: ", self.niter) if not self.isRunning: print("save machine parameters and kill optimizer") if self.sop != None: self.save_action([devices, method, params], flag="force stop") sleep( 1 ) # in order to give time to save parameters before next evaluation of error_func pen_max = 100.0 for i in range(len(x)): if self.debug: print('{0} x[{1}]={2}'.format(devices[i], i, x[i])) limits = self.dp.get_limits(devices[i]) if self.debug: print('limits=[{0}, {1}]'.format(limits[0], limits[1])) if x[i] < limits[0] or x[i] > limits[1]: print('limits exceeded') return pen_max for i in range(len(devices)): print('setting', devices[i], '->', x[i]) self.mi.set_value(devices[i], x[i]) sleep(self.timeout) sase = self.mi.get_sase(detector=self.detector) alarm = np.max(self.mi.get_alarms()) if self.debug: print('alarm:', alarm) if self.debug: print('sase:', sase) pen = 0.0 if alarm > 1.0: return pen_max if alarm > 0.7: return alarm * 50.0 pen += alarm pen -= sase if self.debug: print('penalty:', pen) self.penalty = pen return pen sase_ref = self.mi.get_sase(detector=self.detector) x = np.array([self.mi.get_value(dev) for dev in devices]) tols = np.zeros(len(devices)) for i, dev in enumerate(devices): limits = self.dp.get_limits(dev) tols[i] = (limits[1] - limits[0]) / 2. x_init = x if self.logging: f = open(self.log_file, 'a') f.write('\n*** optimization step ***\n') f.write(str(devices) + '\n') f.write(method + '\n') f.write('x0=' + str(x_init) + '\n') f.write('sase0=' + str(sase_ref) + '\n') try: max_iter = params['maxiter'] except KeyError: max_iter = 10 * len(x) if max_iter == None: max_iter = 10 * len(x) try: xtol = params['xtol'] except KeyError: xtol = 1.e-3 self.maxiter = max_iter if method == 'simplex': print('using simplex optimizer, params:', params) # opt.fmin(error_func,x,xtol=xtol, maxiter=max_iter) opt.fmin(error_func, np.ones(len(x)), args=(x_init, tols), xtol=xtol, maxfun=max_iter) # opt.fmin(error_func, x, args=(x_init,tols), xtol=xtol, maxfun=max_iter) if method == 'cg': print('using CG optimizer, params:', params) try: epsilon = params['epsilon'] except KeyError: epsilon = 0.1 opt.fmin_cg(error_func, x / x_init, args=(x_init, ), gtol=xtol, epsilon=epsilon, maxiter=max_iter) if method == 'powell': print('using powell optimizer, params:', params) opt.fmin_powell(error_func, x / x_init, args=(x_init, ), xtol=xtol, maxiter=max_iter) if method == 'fancy_stuff_from': print('using fancy optimizer, params:', params) pass sase_new = self.mi.get_sase(detector=self.detector) #self.save_machine_set() #print ('step ended changing sase from/to', sase_ref, sase_new) #if sase_new <= sase_ref: # for i in range(len(devices)): # print ('reverting', devices[i], '->',x_init[i]) # self.mi.set_value(devices[i], x_init[i]) if self.logging: f.write('sase_new=' + str(sase_new) + '\n') f.close()
# Test ForwardProp theta = generateNeuralNetwork(layerSizes=[400, 25, 10], eps=chooseEps(400, 10)) y_vect = vectorize_labels(y, numLabels=theta[1].shape[0]) lam = 1 sampleSize = y.shape[0] cF = nnCostFunction_reg(theta, x, y_vect, sampleSize, lam) print(cF) # Test Gradient Delta = backPropagate(theta, x, y_vect, sampleSize, lam) checkGradient(theta, Delta, x, y_vect, sampleSize, lam) # Run Optimizer theta_unrolled = unrollData(theta) x_unrolled = unrollDataX(x) opt = op.fmin_cg(f=nnCostFunction_reg_unrolled, x0=theta_unrolled, fprime=backPropagate_unrolled, args=(x_unrolled, y, sampleSize, lam), maxiter=50, disp=1, full_output=1) optTheta = rollData(opt[0]) pred = makePrediction(x, y, optTheta) displayHiddenUnits(optTheta, 5)
# network. To train your neural network, we will now use 'opt.fmin_cg'. # print('Training Neural Network ... ') lmd = 1 def cost_func(p): return ncf.nn_cost_function(p, input_layer_size, hidden_layer_size, num_labels, X, y, lmd)[0] def grad_func(p): return ncf.nn_cost_function(p, input_layer_size, hidden_layer_size, num_labels, X, y, lmd)[1] nn_params, *unused = opt.fmin_cg(cost_func, fprime=grad_func, x0=nn_params, maxiter=400, disp=True, full_output=True) # Obtain theta1 and theta2 back from nn_params theta1 = nn_params[:hidden_layer_size * (input_layer_size + 1)].reshape(hidden_layer_size, input_layer_size + 1) theta2 = nn_params[hidden_layer_size * (input_layer_size + 1):].reshape(num_labels, hidden_layer_size + 1) input('Program paused. Press ENTER to continue') # ===================== Part 10: Visualize Weights ===================== # You can now 'visualize' what the neural network is learning by # displaying the hidden units to see what features they are capturing in # the data print('Visualizing Neural Network...') dd.display_data(theta1[:, 1:])