def fit_circle(rad_guess,x_guess,y_guess,pts,method,verbose=True): def error_function(params): center = np.matrix((params[0],params[1])).T rad = params[2] #print 'pts.shape', pts.shape #print 'center.shape', center.shape #print 'ut.norm(pts-center).shape',ut.norm(pts-center).shape err = ut.norm(pts-center).A1 - rad res = np.dot(err,err) return res params_1 = [x_guess,y_guess,rad_guess] if method == 'fmin': r = so.fmin(error_function,params_1,xtol=0.0002,ftol=0.000001,full_output=1,disp=verbose) opt_params_1,fopt_1 = r[0],r[1] elif method == 'fmin_bfgs': r = so.fmin_bfgs(error_function,params_1,full_output=1,disp=verbose) opt_params_1,fopt_1 = r[0],r[1] else: raise RuntimeError('unknown method: '+method) params_2 = [x_guess,y_guess+2*rad_guess,rad_guess] if method == 'fmin': r = so.fmin(error_function,params_2,xtol=0.0002,ftol=0.000001,full_output=1,disp=verbose) opt_params_2,fopt_2 = r[0],r[1] elif method == 'fmin_bfgs': r = so.fmin_bfgs(error_function,params_2,full_output=1,disp=verbose) opt_params_2,fopt_2 = r[0],r[1] else: raise RuntimeError('unknown method: '+method) if fopt_2<fopt_1: return opt_params_2[2],opt_params_2[0],opt_params_2[1] else: return opt_params_1[2],opt_params_1[0],opt_params_1[1]
def main(): # read in the student admission data fp = open('ex2data1.txt', 'r') students = [] for line in fp: row = line.strip().split(',') students.append([float(row[0]), float(row[1]), int(row[2])]) dfs = pd.DataFrame(students) dfs.columns = ['Exam1', 'Exam2', 'ad'] # normalize raw data to prevent math overflow dfs['Exam1'] = (dfs['Exam1'] - np.mean(dfs['Exam1'])) / np.std(dfs['Exam1']) dfs['Exam2'] = (dfs['Exam2'] - np.mean(dfs['Exam2'])) / np.std(dfs['Exam2']) # create y array and x matrix ydata = np.array(dfs['ad']) xdata = np.asmatrix([np.array(dfs['Exam1']), np.array(dfs['Exam2'])]) xdata = xdata.transpose() # use build in optimization function to calculate beta betaOpt = fmin_bfgs(functools.partial(logisticCost, ydata, xdata, False), [0, 0], fprime = functools.partial(diff, ydata, xdata, False)) betaOptInt = fmin_bfgs(functools.partial(logisticCost, ydata, xdata, True), [0, 0, 0], fprime = functools.partial(diff, ydata, xdata, True)) scatterPlot(dfs, betaOpt, False, "w/o intercept") scatterPlot(dfs, betaOptInt, True, "w/ intercept")
def fit_circle_priors(rad_guess, x_guess, y_guess, pts, sigma_r, sigma_xy, sigma_pts, verbose=True): global x_prior, y_prior x_prior = x_guess y_prior = y_guess def error_function(params): center = np.matrix((params[0],params[1])).T rad = params[2] err_pts = ut.norm(pts-center).A1 - rad lik = np.dot(err_pts, err_pts) / (sigma_pts * sigma_pts) pri = ((rad - rad_guess)**2) / (sigma_r * sigma_r) #pri += ((x_prior - center[0,0])**2) / (sigma_xy * sigma_xy) #pri += ((y_prior - center[1,0])**2) / (sigma_xy * sigma_xy) return (lik + pri) params_1 = [x_prior, y_prior, rad_guess] r = so.fmin_bfgs(error_function, params_1, full_output=1, disp = verbose, gtol=1e-5) opt_params_1,fopt_1 = r[0],r[1] y_prior = y_guess + 2*rad_guess params_2 = [x_prior, y_prior, rad_guess] r = so.fmin_bfgs(error_function, params_2, full_output=1, disp = verbose, gtol=1e-5) opt_params_2,fopt_2 = r[0],r[1] if fopt_2<fopt_1: return opt_params_2[2],opt_params_2[0],opt_params_2[1] else: return opt_params_1[2],opt_params_1[0],opt_params_1[1]
def estimate(self,dat,method=None): ''' Estimates the parameters from the data in dat. It is possible to only selectively fit parameters of the distribution by setting the primary array accordingly (see :doc:`Tutorial on the Distributions module <tutorial_Distributions>`). :param dat: Data points on which the Gaussian distribution will be estimated. :type dat: natter.DataModule.Data :param method: method that is used to estimate the parameters (choices are 'analytic', and 'numeric') :type method: string ''' if method==None: method = "analytic" if method=="analytic": if 'sigma' in self.primary: self.param['sigma'] = dat.cov() self.cholP = cholesky(inv(self.param['sigma'])) if 'mu' in self.primary: self.param['mu'] = dat.mean() else: def f(arr): self.array2primary(arr) return -sum(self.loglik(dat)) def df(arr): self.array2primary(arr) return -sum(self.dldtheta(dat),axis=1) arr0 = self.primary2array() optimize.fmin_bfgs(f,arr0,df)
def vb_optimize(x0, set_values, lowerbound, gradient=None): # Function for computing the lower bound def func(x): # Set the value of the nodes set_values(x) # Compute lower bound (and gradient terms) return -lowerbound() #return f # Function for computing the gradient of the lower bound def funcprime(x): # Collect the gradients from the nodes set_values(x) # Compute lower bound (and gradient terms) #lowerbound() return -gradient() #return df # Optimize if gradient != None: check_gradient(x0, func, funcprime, 1e-6) xopt = optimize.fmin_bfgs(func, x0, fprime=funcprime, maxiter=100) #xopt = optimize.fmin_ncg(func, x0, fprime=funcprime, maxiter=50) else: xopt = optimize.fmin_bfgs(func, x0, maxiter=100) #xopt = optimize.fmin_ncg(func, x0, maxiter=50) # Set optimal values to the nodes set_values(xopt)
def string_gp_regression_calibrate(X, Y, n_string, min_t, max_t, x_0, hyper_type = 'SE', ): from scipy.optimize import fmin_bfgs K = n_string # Number of strings # Create the array of input string gp indices (X might not be sorted) X_couples = [(X[i], i) for i in xrange(len(X))] from operator import itemgetter X_couples.sort(key=itemgetter(0)) X_sorted = [elt[0] for elt in X_couples] def log_marginal(x): noise_vars = x[:K]**2 # The first K terms are string noise variances thetas = [] for _ in xrange(K): thetas += [np.abs([x[K+2*_], x[K+1+2*_]])] # The next 2K are thetas thetas = np.array(thetas) drvs = x[-n_string:] # The last K are used to determine boundary times b_X_sorted = boundaries_from_drivers(drvs, min_t, max_t) if n_string > 1: X_sorted_string_ids = [] idx = 1 for x in X_sorted: while x > b_X_sorted[idx]: idx += 1 X_sorted_string_ids += [idx] else: X_sorted_string_ids = [1]*len(X_sorted) X_sorted_string_ids_couples = [(X_sorted_string_ids[i], X_couples[i][1]) for i in xrange(len(X_couples))] X_sorted_string_ids_couples.sort(key=itemgetter(1)) X_string_ids = np.array([elt[0] for elt in X_sorted_string_ids_couples])-1 #String indexed from 0 here cov = string_cov(X, X, thetas, b_X_sorted, hyper_type.lower()) + np.diag(noise_vars[X_string_ids]) try: svd_factor = SVDFactorise(cov) except: print thetas print b_X_sorted raise ValueError cov_i = svd_factor['inv'] cov_det = svd_factor['det'] res = np.log(cov_det)+np.dot(Y, np.dot(cov_i, Y)) return res # Attempt 1: warm-up/smart initialisation x_opt = fmin_bfgs(log_marginal, x_0, disp=False) # Attempt 2: max from smart initialisation x_opt = fmin_bfgs(log_marginal, np.abs(x_opt), disp=False) return np.abs(x_opt)
def fitBFGS(self, sampleSpec, initValues=None, vrot=None, priors=None): f = minuitFunction(self, sampleSpec, priors=priors) if vrot != None: varnames = list(self.params) + ["vrot"] f.varnames(*varnames) return optimize.fmin_bfgs(f, initValues) else: f.varnames(*self.params) return optimize.fmin_bfgs(f, initValues)
def bfgs(x0, f, f_prime, hessian=None): all_x_i = [x0[0]] all_y_i = [x0[1]] all_f_i = [f(x0)] def store(X): x, y = X all_x_i.append(x) all_y_i.append(y) all_f_i.append(f(X)) optimize.fmin_bfgs(f, x0, f_prime, callback=store, gtol=1e-12) return all_x_i, all_y_i, all_f_i
def fit_circle(rad_guess, x_guess, y_guess, pts, method, verbose=True, rad_fix = False): def error_function(params): center = np.matrix((params[0],params[1])).T if rad_fix: rad = rad_guess else: rad = params[2] err = ut.norm(pts-center).A1 - rad res = np.dot(err,err) #if not rad_fix and rad < 0.3: # res = res*(0.3-rad)*100 return res params_1 = [x_guess,y_guess] if not rad_fix: params_1.append(rad_guess) if method == 'fmin': r = so.fmin(error_function,params_1,xtol=0.0002,ftol=0.000001,full_output=1,disp=verbose) opt_params_1,fopt_1 = r[0],r[1] elif method == 'fmin_bfgs': r = so.fmin_bfgs(error_function, params_1, full_output=1, disp = verbose, gtol=1e-5) opt_params_1,fopt_1 = r[0],r[1] else: raise RuntimeError('unknown method: '+method) params_2 = [x_guess,y_guess+2*rad_guess] if not rad_fix: params_2.append(rad_guess) if method == 'fmin': r = so.fmin(error_function,params_2,xtol=0.0002,ftol=0.000001,full_output=1,disp=verbose) opt_params_2,fopt_2 = r[0],r[1] elif method == 'fmin_bfgs': r = so.fmin_bfgs(error_function, params_2, full_output=1, disp = verbose, gtol=1e-5) opt_params_2,fopt_2 = r[0],r[1] else: raise RuntimeError('unknown method: '+method) if fopt_2<fopt_1: if rad_fix: return rad_guess,opt_params_2[0],opt_params_2[1] else: return opt_params_2[2],opt_params_2[0],opt_params_2[1] else: if rad_fix: return rad_guess,opt_params_1[0],opt_params_1[1] else: return opt_params_1[2],opt_params_1[0],opt_params_1[1]
def DCdisappFull(lowerlim=100, upperlim=9e4): x = np.linspace(lowerlim, upperlim, (upperlim-lowerlim)/10) y = vacuumOscProb('e','e',x) normlabel = r'$P_{\nu_e \rightarrow \nu_e}$ (Normal hierarchy)' def myVOP(var): return np.real(vacuumOscProb('e', 'e', var, True)) oscmax13 = optimize.fmin_bfgs(myVOP, 1000) oscmax12 = optimize.fmin_bfgs(myVOP, 16000) fig, ax = plt.subplots() ax.plot(x, y, linewidth=2, label=normlabel) ax.set_xscale('log') ax.grid() plt.axis([lowerlim, upperlim, 0, 1]) # x-axis and y-axis ranges ax.tick_params(axis='both', which='major', labelsize=14) # Tick number sizes plt.xlabel(r'$L/E\ \mathrm{[m/MeV]}$', fontsize=20) plt.ylabel(r'$\nu_e$ Survival probability', fontsize=20) # Fill between y and 1 over the range specified in the "where" statement. ax.fill_between(x,y,1,where=(x < 1050/1.3) & (x > 1050/9), facecolor='grey', alpha=0.4) ax.vlines(270, vacuumOscProb('e','e',270), 1.0, color='red', linestyle=':', linewidth=4) plt.annotate('', xy=(oscmax13,np.real(vacuumOscProb('e','e',oscmax13)) ), xycoords='data', xytext=(oscmax13,1.0), textcoords='data', arrowprops = {'arrowstyle':'<->'}) plt.annotate(r'$\theta_{13} \sim$', xy=(350,0.95), xycoords='data', xytext=(0,0), textcoords='offset points') plt.annotate('', xy=(840,0.943), xycoords='data', xytext=(1290,0.943), textcoords='data', arrowprops = {'arrowstyle':'<->'}) plt.annotate(r'$\sim \Delta m_{31}^2$', xy=(750,0.90), xycoords='data', xytext=(0,0), textcoords='offset points') plt.annotate('', xy=(oscmax12,np.real(vacuumOscProb('e','e',oscmax12))+0.03), xycoords='data', xytext=(oscmax12,1.0), textcoords='data', arrowprops = {'arrowstyle':'<->'}) plt.annotate(r'$\theta_{12} \sim$', xy=(10500,0.65), xycoords='data', xytext=(0,0), textcoords='offset points') plt.annotate('', xy=(23600,0.41), xycoords='data', xytext=(42000,0.41), textcoords='data', arrowprops = {'arrowstyle':'<->'}) plt.annotate(r'$\sim \Delta m_{21}^2$', xy=(22000,0.36), xycoords='data', xytext=(0,0), textcoords='offset points') fig.subplots_adjust(bottom=0.127) # Move up x-axis to fit label plt.show()
def scoreAGraph(G, data, x0 = None): A, B = npG2SVAR(G) K = scipy.sum(abs(A)+abs(B)) a_idx = np.where(A != 0) b_idx = np.where(B != 0) if x0: o = optimize.fmin_bfgs(nllf, x0, args=(A, B, data, a_idx, b_idx), disp=False, full_output=True) else: o = optimize.fmin_bfgs(nllf, scipy.randn(K), args=(np.double(A), np.double(B), data, a_idx, b_idx), disp=False, full_output=True) return 2*o(1) + K*np.log(T) #VARbic(o[1],K,data.shape[1])
def main(): #print gradMinimize(0, np.array([[1]]), .02, SSE, SSEgrad, 0) #print gradMinimize(0, np.array([[1],[2]]), .01, SSE, SSEgrad, 1) #print gradMinimize(0.1, np.array([[1],[2],[2],[2]]), .01, SSE, SSEgrad, 3) #print gradMinimize(0.1, np.array([[1],[2],[2],[2],[2],[2],[2],[2],[2],[2]]), .01, SSE, SSEgrad, 9) #actual = max_likelihood_weight(X, Y, _phi(X, 0, basis_function_poly)) basis_function_poly = lambda x, y: math.pow(x,y) ''' for i in range(-10, 10, 1): actual = max_likelihood_weight(X, Y, _phi(X, 0, basis_function_poly)) soln = gradMinimize(0.00001, i*np.ones((1, 1)), .05, SSE, SSEgrad, 0) #print "M=0, i={}, soln={}".format(i, soln) print "M=0, i={}, error={}".format(i, np.dot((soln -actual).T, (soln -actual))) for i in range(-10, 10, 1): actual = max_likelihood_weight(X, Y, _phi(X, 1, basis_function_poly)) soln = gradMinimize(0.00001, i*np.ones((2, 1)), .07, SSE, SSEgrad, 1) #print "M=1, i={}, soln={}".format(i, soln) print "M=1, i={}, error={}".format(i, np.dot((soln -actual).T, (soln -actual))) for i in range(-10, 10, 1): actual = max_likelihood_weight(X, Y, _phi(X, 3, basis_function_poly)) soln = gradMinimize(0.00001, i*np.ones((4, 1)), .07, SSE, SSEgrad, 3) #print "M=3, i={}, soln={}".format(i, soln) print "M=3, i={}, error={}".format(i, np.dot((soln -actual).T, (soln -actual))) for i in range(-10, 10, 1): actual = max_likelihood_weight(X, Y, _phi(X, 9, basis_function_poly)) soln = gradMinimize(0.00001, i*np.ones((10, 1)), .04, SSE, SSEgrad, 9) #print "M=9, i={}, soln={}".format(i, soln) print "M=9, i={}, error={}".format(i, np.dot((soln -actual).T, (soln -actual))) regressionPlot(X, Y, 0, gradMinimize(0.00001, 0.1*np.ones((1, 1)), .05, SSE, SSEgrad, 0), basis_function) regressionPlot(X, Y, 1, gradMinimize(0.00001, 0.1*np.ones((2, 1)), .07, SSE, SSEgrad, 1), basis_function) regressionPlot(X, Y, 3, gradMinimize(0.00001, 0.1*np.ones((4, 1)), .07, SSE, SSEgrad, 3), basis_function) regressionPlot(X, Y, 9, gradMinimize(0.00001, 0.1*np.ones((10, 1)), .04, SSE, SSEgrad, 9), basis_function) ''' print gradMinimize(0.00001, 0.1*np.ones((1, 1)), .05, SSE, SSEgrad, 0) print gradMinimize(0.00001, 0.1*np.ones((2, 1)), .07, SSE, SSEgrad, 1) print gradMinimize(0.00001, 0.1*np.ones((4, 1)), .07, SSE, SSEgrad, 3) print gradMinimize(0.00001, 0.1*np.ones((10, 1)), .04, SSE, SSEgrad, 9) for i in [0,1,3,9]: print fmin_bfgs(SSEfcn(X, Y, i, basis_function), .1*np.ones((i+1, 1))) '''
def LogisticRegression(): data = loadtxtAndcsv_data("data2.txt", ",", np.float64) X = data[:,0:-1] y = data[:,-1] plot_data(X,y) # 作图 X = mapFeature(X[:,0],X[:,1]) #映射为多项式 initial_theta = np.zeros((X.shape[1],1))#初始化theta initial_lambda = 0.1 #初始化正则化系数,一般取0.01,0.1,1..... J = costFunction(initial_theta,X,y,initial_lambda) #计算一下给定初始化的theta和lambda求出的代价J print J #输出一下计算的值,应该为0.693147 #result = optimize.fmin(costFunction, initial_theta, args=(X,y,initial_lambda)) #直接使用最小化的方法,效果不好 '''调用scipy中的优化算法fmin_bfgs(拟牛顿法Broyden-Fletcher-Goldfarb-Shanno) - costFunction是自己实现的一个求代价的函数, - initial_theta表示初始化的值, - fprime指定costFunction的梯度 - args是其余测参数,以元组的形式传入,最后会将最小化costFunction的theta返回 ''' result = optimize.fmin_bfgs(costFunction, initial_theta, fprime=gradient, args=(X,y,initial_lambda)) p = predict(X, result) #预测 print u'在训练集上的准确度为%f%%'%np.mean(np.float64(p==y)*100) # 与真实值比较,p==y返回True,转化为float X = data[:,0:-1] y = data[:,-1] plotDecisionBoundary(result,X,y) #画决策边界
def regressionPlotDescentBuiltin(X, Y, order, guess): pl.plot(X.T.tolist()[0],Y.T.tolist()[0], 'gs') # You will need to write the designMatrix and regressionFit function # constuct the design matrix (Bishop 3.16), the 0th column is just 1s. phi = designMatrix(X, order) # compute the weight vector wo = regressionFit(X, Y, phi) print 'optimal w', wo def f(w): #print np.matrix(w).T return SSE(X,Y,order,np.matrix(w).T) w = fmin_bfgs(f,guess); w = np.matrix(w).T print 'descent w', w print SSE(X,Y,order,w); #print SSEDer(X,Y,order,w); #print num_gradient(lambda w: SSE(X,Y,order,w),w,0.001); # produce a plot of the values of the function pts = [[p] for p in pl.linspace(min(X), max(X), 100)] A= np.matrix(pts) Yp = pl.dot(w.T, designMatrix(A, order).T) pl.plot(pts, Yp.tolist()[0])
def oneVsAll(X,y,num_labels,lam): ndims = X.shape m = ndims[0] n = ndims[1] # the matrix of theta values for each label and each pixel all_thetas = np.zeros([num_labels,n+1]) initial_theta = np.zeros(n+1) newX = np.ones([m,n+1]) newX[:,1:] = X[:,:] # re-organizing the y array to a one-dimensional array y = y[:,0] for ii in range(num_labels): # initializing the y array to all zeros newy = np.zeros(y.size) # finding the indices that are the current digit if ii==0: digit = np.where(y==10) else: digit = np.where(y==ii) newy[digit] = 1 theta = fmin_bfgs(costFunction.computeRegularizedCost,initial_theta, fprime=costFunction.regularizedLogisiticDeriv, args=(newX,newy,lam)) all_thetas[ii,:] = theta[:] # endfor ii in range(num_labels) return all_thetas
def maximize(L, DL, D2L, x, method=None, disp=False): """Main function to perform numerical optimization. L, DL and D2L are the objective function and its derivative and Hessian, and x is the initial guess (current rating). It will attempt the maximization using four different methods, from fastest and least robust, to slowest and most robust. It returns the argmin, or None if an error occured.""" mL = lambda x: -L(x) mDL = lambda x: -DL(x) mD2L = lambda x: -D2L(x) # Newton Conjugate Gradient if method == None or method == 'ncg': func = lambda x0: opt.fmin_ncg(mL, x0, fprime=mDL, fhess=mD2L, disp=disp, full_output=True, avextol=1e-10) xm = check_max(func, x, 5, 'NCG', disp) if xm != None: return xm # Broyden-Fletcher-Goldfarb-Shanno if method == None or method == 'bfgs': func = lambda x0: opt.fmin_bfgs(mL, x0, fprime=mDL, disp=disp, full_output=True, gtol=1e-10) xm = check_max(func, x, 6, 'BFGS', disp) if xm != None: return xm # Powell if method == None or method == 'powell': func = lambda x0: opt.fmin_powell(mL, x0, disp=disp, full_output=True, ftol=1e-10) xm = check_max(func, x, 5, 'POWELL', disp) if xm != None: return xm # Downhill simplex (last resort) func = lambda x0: opt.fmin(mL, x0, disp=disp, full_output=True, ftol=1e-10) xm = check_max(func, x, 4, 'DOWNHILL_SIMPLEX', disp) return xm
def get_reps(A, R_train, lam, k): ''' Perform gradient descent to learn the parameters U, V, beta, alpha ''' # initialize U, V, beta, alpha n = A.shape[0] U0 = (np.random.rand(n,k)-0.5)*0.1 V0 = (np.random.rand(n,k)-0.5)*0.1 beta0 = (np.random.rand(n,1)-0.5)*0.1 alpha0 = (random.random()-0.5)*0.1 U0flat = np.reshape(U0, (1,U0.size)) V0flat = np.reshape(V0, (1,V0.size)) X0 = np.concatenate((U0flat, V0flat, np.transpose(beta0), np.array([[alpha0]])), axis=1)[0] args=(A, R_train, lam, n, k) def callback_logit(Xk): global Neval #print '{0:4d} {1: 3.6f}'.format(Neval, cost_logit_lowspace(Xk, args[0], args[1], args[2], args[3], args[4])) print '{0:4d} {1: 3.6f}'.format(Neval, cost_logit(Xk, args[0], args[1], args[2], args[3], args[4])) Neval += 1 print '\tminimizing with BFGS...' #Xopt = opt.fmin_bfgs(cost_logit_lowspace, X0, fprime=grad_logit_lowspace, args=args, callback=callback_logit) Xopt = opt.fmin_bfgs(cost_logit, X0, fprime=grad_logit, args=args) #print '\tminimizing with CG...' #Xopt = opt.fmin_cg(cost_logit_lowspace, X0, fprime=grad_logit_lowspace, args=args, maxiter=10, callback=callback_logit) print '\tdone.' U = Xopt[:n*k] U = np.reshape(U, (n,k)) V = Xopt[n*k:2*n*k] V = np.reshape(V, (n,k)) beta = Xopt[2*n*k:2*n*k+n] beta = np.reshape(beta, (n,1)) alpha = Xopt[-1] return U, V, beta, alpha
def maximize(L, DL, D2L, x, method=None, disp=False): mL = lambda x: -L(x) mDL = lambda x: -DL(x) mD2L = lambda x: -D2L(x) if method == None or method == 'ncg': func = lambda x0: opt.fmin_ncg(mL, x0, fprime=mDL, fhess=mD2L,\ disp=disp, full_output=True,\ avextol=1e-10) xm = check_max(func, x, 5, 'NCG', disp) if xm != None: return xm if method == None or method == 'bfgs': func = lambda x0: opt.fmin_bfgs(mL, x0, fprime=mDL,\ disp=disp, full_output=True,\ gtol=1e-10) xm = check_max(func, x, 6, 'BFGS', disp) if xm != None: return xm if method == None or method == 'powell': func = lambda x0: opt.fmin_powell(mL, x0, disp=disp, full_output=True,\ ftol=1e-10) xm = check_max(func, x, 5, 'POWELL', disp) if xm != None: return xm func = lambda x0: opt.fmin(mL, x0, disp=disp, full_output=True, ftol=1e-10) xm = check_max(func, x, 4, 'DOWNHILL_SIMPLEX', disp) return xm
def test_bfgs(self, use_wrapper=False): """ Broyden-Fletcher-Goldfarb-Shanno optimization routine """ if use_wrapper: opts = {'maxit': self.maxiter, 'disp': False} params, info = optimize.minimize(self.func, self.startparams, jac=self.grad, method='BFGS', args=(), options=opts, full_output=True, retall=False) fopt, gopt, Hopt, func_calls, grad_calls, warnflag = \ info['fun'], info['jac'], info['hess'], info['nfev'], \ info['njev'], info['status'] else: retval = optimize.fmin_bfgs(self.func, self.startparams, self.grad, args=(), maxiter=self.maxiter, full_output=True, disp=False, retall=False) (params, fopt, gopt, Hopt, func_calls, grad_calls, warnflag) = retval err = abs(self.func(params) - self.func(self.solution)) #print "BFGS: Difference is: " + str(err) assert_(err < 1e-6) # Ensure that function call counts are 'known good'; these are from # Scipy 0.7.0. Don't allow them to increase. assert_(self.funccalls == 10, self.funccalls) assert_(self.gradcalls == 8, self.gradcalls) # Ensure that the function behaves the same; this is from Scipy 0.7.0 assert_(np.allclose(self.trace[6:8], [[0, -5.25060743e-01, 4.87748473e-01], [0, -5.24885582e-01, 4.87530347e-01]], atol=1e-14, rtol=1e-7), self.trace[6:8])
def fit(self, x, yy, weights=None): """Train the model. x = (Nobs, nvars) y = (Nobs, ) Bias term automatically added Returns the loss""" # transform y to vector if len(yy.shape) > 1: assert len(yy.shape) == 2 and yy.shape[1] == 1 y = yy.reshape(-1, ) else: y = yy def _loss_for_optimize(params): return LinearRegression._loss(x, y, params[0], params[1:], self.lam, weights) def _gradient_for_optimize(params): return LinearRegression._gradient_loss(x, y, params[0], params[1:], self.lam, weights) params_opt = fmin_bfgs(_loss_for_optimize, np.zeros(1 + x.shape[1]), fprime=_gradient_for_optimize, maxiter=200) self.b = params_opt[0] self.w = params_opt[1:] return _loss_for_optimize(params_opt)
def test_bfgs(self): # Broyden-Fletcher-Goldfarb-Shanno optimization routine if self.use_wrapper: opts = {'maxiter': self.maxiter, 'disp': self.disp, 'return_all': False} res = optimize.minimize(self.func, self.startparams, jac=self.grad, method='BFGS', args=(), options=opts) params, fopt, gopt, Hopt, func_calls, grad_calls, warnflag = ( res['x'], res['fun'], res['jac'], res['hess_inv'], res['nfev'], res['njev'], res['status']) else: retval = optimize.fmin_bfgs(self.func, self.startparams, self.grad, args=(), maxiter=self.maxiter, full_output=True, disp=self.disp, retall=False) (params, fopt, gopt, Hopt, func_calls, grad_calls, warnflag) = retval assert_allclose(self.func(params), self.func(self.solution), atol=1e-6) # Ensure that function call counts are 'known good'; these are from # Scipy 0.7.0. Don't allow them to increase. assert_(self.funccalls == 10, self.funccalls) assert_(self.gradcalls == 8, self.gradcalls) # Ensure that the function behaves the same; this is from Scipy 0.7.0 assert_allclose(self.trace[6:8], [[0, -5.25060743e-01, 4.87748473e-01], [0, -5.24885582e-01, 4.87530347e-01]], atol=1e-14, rtol=1e-7)
def _fit_bfgs(f, score, start_params, fargs, kwargs, disp=True, maxiter=100, callback=None, retall=False, full_output=True, hess=None): gtol = kwargs.setdefault('gtol', 1.0000000000000001e-05) norm = kwargs.setdefault('norm', np.Inf) epsilon = kwargs.setdefault('epsilon', 1.4901161193847656e-08) retvals = optimize.fmin_bfgs(f, start_params, score, args=fargs, gtol=gtol, norm=norm, epsilon=epsilon, maxiter=maxiter, full_output=full_output, disp=disp, retall=retall, callback=callback) if full_output: if not retall: xopt, fopt, gopt, Hinv, fcalls, gcalls, warnflag = retvals else: (xopt, fopt, gopt, Hinv, fcalls, gcalls, warnflag, allvecs) = retvals converged = not warnflag retvals = {'fopt': fopt, 'gopt': gopt, 'Hinv': Hinv, 'fcalls': fcalls, 'gcalls': gcalls, 'warnflag': warnflag, 'converged': converged} if retall: retvals.update({'allvecs': allvecs}) else: xopt = None return xopt, retvals
def data2AB(data, x0=None): n = data.shape[0] T = data.shape[1] YY = np.dot(data[:, 1:], data[:, 1:].T) XX = np.dot(data[:, :-1], data[:, :-1].T) YX = np.dot(data[:, 1:], data[:, :-1].T) model = VAR(data.T) r = model.fit(1) A = r.coefs[0,:,:] # A = np.ones((n,n)) B = np.ones((n, n)) np.fill_diagonal(B, 0) B[np.triu_indices(n)] = 0 K = np.int(scipy.sum(abs(B)))#abs(A)+abs(B))) a_idx = np.where(A != 0) b_idx = np.where(B != 0) np.fill_diagonal(B, 1) try: s = x0.shape x = x0 except AttributeError: x = np.r_[A.flatten(), 0.1*scipy.randn(K)] o = optimize.fmin_bfgs(nllf2, x, args=(np.double(A), np.double(B), YY, XX, YX, T, a_idx, b_idx), gtol=1e-12, maxiter=500, disp=False, full_output=True) A, B = x2M(o[0], np.double(A), np.double(B), a_idx, b_idx) B = B+B.T return A, B
def get_starting_position(self, num_walkers=1, squeeze=True): """ Returns a potential starting position for a sampler. Parameters ---------- num_walkers : int, optional If given, will generate multiple starting points for samplers that require it. squeeze : bool, optional If only one starting position is requested (as is default), squeeze will use ``np.squeeze`` to condense the 2D array of shape ``(1, len(theta))`` to a 1D array of size ``len(theta)``. """ num_dim = len(self._theta_names) self.logger.debug("Generating starting guesses") p0 = self._get_suggestion() sigmas = self._get_suggestion_sigma() self.logger.debug("Initial position is: %s" % p0) if len(p0) < 2: optimised = fmin_bfgs(self._get_negative_log_posterior, p0, disp=0) else: optimised = p0 self.logger.debug("Optimised position is: %s" % optimised) std = np.random.uniform(low=-1, high=1, size=(num_walkers, num_dim)) * \ np.array(sigmas).reshape((1, -1)) start = optimised + std if squeeze and num_walkers == 1: return start.squeeze(axis=0) else: return start
def train(self, trainset): c = lambda t : self.cost(trainset, t) d = lambda t : self._derivatives(trainset, t) initial_theta = ut.matrices_to_vector([l._theta for l in self._layers]) c(initial_theta) optimum_theta = opt.fmin_bfgs(c, initial_theta, fprime=d, disp=False) print optimum_theta
def train(self): from scipy.optimize import fmin_bfgs Y = self.Y_train X = self.X_train n_samples = self.Y_train.shape[0] K = np.zeros((n_samples, n_samples)) for i in range(n_samples): for j in range(n_samples): K[i,j] = self.kernel(X[i], X[j]) self.K = K args = [1e-4]*(n_samples+1) #args = np.ones((n_samples+1))*.001 solution = fmin_bfgs(self.NLL, args, maxiter=2) self.alphas = solution[0:-1] #print self.alphas self.w_0 = solution[-1] self.calc_w_0() #print self.w_0 num_alphas = 0 for a in self.alphas: if a > 1e-5: num_alphas += 1 print 'num_alphas' ,num_alphas
def main(): import time times = [] algor = [] x0 = [0.8, 1.2, 0.7] print "BFGS Quasi-Newton" print "=================" start = time.time() x = optimize.fmin_bfgs(optimize.rosen, x0, fprime=optimize.rosen_der, maxiter=80) print x times.append(time.time() - start) algor.append('BFGS Quasi-Newton\t') print "OWLQN" print "=================" start = time.time() x = fmin_owlqn(optimize.rosen, x0, fprime=optimize.rosen_der, maxiter=80) print x times.append(time.time() - start) algor.append('OWLQN\t\t\t') print print "\nMinimizing the Rosenbrock function of order 3\n" print " Algorithm \t\t\t Seconds" print "===========\t\t\t =========" for k in range(len(algor)): print algor[k], "\t -- ", times[k]
def nfp(d, b, guess, stateMax, stateInt, stateNum): ''' Rust's Nested Fixed Point algorithm ''' cols = ['ident', 'time', 'x', 'i'] d.columns = cols di = d.i dx = d.x dx = dx / stateInt dt = d.time theta = guess dx = dx.diff() dx = dx * (1-di) dx = dx * (dt != 0) p = first_step(dx, stateNum) tol = 1e-8; maxIter = 1000; dif = 1; iterNum = 0 # Iteration bounds while dif > tol and iterNum < maxIter: params = [[b], theta, p] params = [item for sublist in params for item in sublist] EV = val_iter(params, stateMax, stateInt, stateNum) result = fmin_bfgs(log_l, theta, args=(b, dx, d.i, EV), maxiter=1, disp=0, full_output=True) theta = result[0] dif = max(abs(result[2])) # Jacobian evaluated at parameters iterNum +=1 result = [theta.tolist(), p] result = [item for sublist in result for item in sublist] return result
def test(): img = skimage.img_as_float(data.lena()) img_size = img.shape[:2] trans = get_transform(20,15,1.05, 0.02, img_size) img_transformed = transform.warp(img, trans) obj_func = lambda x: transform_and_compare(img_transformed, img, x) x0 = np.array([0,0,1, 0]) results = optimize.fmin_bfgs(obj_func, x0) transform_estimated = get_simple_transform(results) transform_optimal = transform.AffineTransform(np.linalg.inv(trans._matrix)) params_optimal = np.concatenate([transform_optimal.translation, transform_optimal.scale[0:1], [transform_optimal.rotation]]) img_registered = transform.warp(img_transformed, transform_estimated) err_original = mean_sq_diff(img_transformed, img) err_optimal = transform_and_compare(img_transformed, img, params_optimal) err_actual = transform_and_compare(img_transformed, img, results) err_relative = err_optimal/err_original print "Params optimal:", params_optimal print "Params estimated:", results print "Error without registration:", err_original print "Error of optimal registration:", err_optimal print "Error of estimated transformation %f (%.2f %% of intial)" % (err_actual, err_relative*100.) plt.figure() plt.subplot(121) plt.imshow(img_transformed) plt.subplot(122) plt.imshow(img_registered)
def fit_full_curve(self): # main optimization if self.classic_curve_fitted==True: #R_T_init = (1.0/(self.plsq[0][0]*self.N)) # OLD # 23.05.14 ADM R_T_init = (1.0/(self.plsq[0][0])) # NEW # tunnel resistance of the single junction C_sigma_init=e**2/(self.plsq[0][2]*k)*1e15 T_p_init = self.plsq[0][1]*1e3 else: R_T_init = self.R_tunnel_init C_sigma_init = e**2/(self.TEC_init*k)*1e15 T_p_init = self.T_init*1e3 island_volume_init = self.island_size x1=[R_T_init,C_sigma_init,T_p_init] if self.bounds == None: self.xopt1 = optimize.fmin_bfgs(self.optimize_1, x1, gtol=1e-3,full_output=1, disp=1,callback=self.call_func) else: "Print optimizing with bounds" self.xopt1 = optimize.fmin_l_bfgs_b(self.optimize_1, x1, factr=1e7, approx_grad=True, bounds=self.bounds) toc = time.clock() print "==========================================" print "====== After main optimization: ======" print "==========================================" # 28.05.14 ADM print "R_T = %g"%(self.xopt1[0][0]) print "T = %g mK"%(self.xopt1[0][2]) print "C_sigma = %g "%(self.xopt1[0][1]) self.full_curve_fitted = True self.T_fit = self.xopt1[0][2] self.R_T = self.xopt1[0][0] self.C_sigma = self.xopt1[0][1]
def logistic_regression(): data = loadFile( 'C:/Users/Administrator/Desktop/ng_ML_jobs/wuendaex2/ex2data2.txt') theta, X, y = process_data(data) learningRate = 0.01 res = optimize.fmin_bfgs(cg.costFunction, theta, fprime=cg.gradient, args=(X, y, learningRate)) p = predict(X, res) #10.调用预测函数。result参数为我们在上一步中求出的theta最优解 print('theta的最优解为:', res) print('训练的准确度为%f%%' % np.mean(np.float64(p == y) * 100) ) #p==y实际上是一个bool判断。返回的是一个n行1列的数组,值为False和True,用np.float64转化为0和1数组。 X = data[:, 0:-1] #这里需要注意下,重新把X重新定义下,变成只有两个特征的数组。原来的X因为进行了多项式映射,已经有6个了。 #将结果写入文件,方便下次直接画图 path = 'C:/Users/Administrator/Desktop/ng_ML_jobs/wuenda2/model/trained_data.txt' cg.filein(path, res) #画图 plotBoundry(X, y, res) #11.画出决策边界 把theta最优解result代入 return
def oneVsAll(X, y, num_labels, Lambda): m, n = X.shape all_theta = np.zeros((n + 1, num_labels)) #每一列对应该类的theta,共有10类 X = np.hstack((np.ones((m, 1)), X)) class_y = np.zeros((m, num_labels)) theta = np.zeros((n + 1, 1)) #初始化一个类的theta #映射y for i in range(num_labels): class_y[:, i] = np.int32(y == i).reshape(1, -1) '''遍历每个分类,计算对应的theta值''' for i in range(num_labels): result_theta = optimize.fmin_bfgs(costFunction, theta, fprime=gradient, args=(X, class_y[:, i], Lambda)) all_theta[:, i] = result_theta.reshape(1, -1) all_theta = np.transpose(all_theta) return all_theta
def calibrate_least_squares(ref_mean, sensor_mean): ref_mean = np.array(ref_mean) length = min(ref_mean.shape[0], sensor_mean.shape[0]) ref_mean = ref_mean[:length] sensor_mean = sensor_mean[:length] def error_function(params): m, c = params[0], params[1] sensor_predict = m * ref_mean + c err = (sensor_predict - sensor_mean) return np.sum((err * err) * np.abs(ref_mean)) #return np.sum(err * err) params = [1., 0.] r = so.fmin_bfgs(error_function, params, full_output=1, disp=False, gtol=1e-5) print 'Optimization result:', r[0]
def run(self): # the actual optimization function output = opt.fmin_bfgs( self.f, self.x0(), fprime=self.fprime, # args=(), gtol=self.fmax * 0.1, # Should never be reached norm=np.inf, #epsilon=1.4901161193847656e-08, maxiter=self.steps, full_output=1, disp=0, # retall=0, callback=self.callback) warnflag = output[-1] if warnflag == 2: print( 'Desired error not necessarily achieved (due to precision loss)' )
def test_bfgs_infinite(self): # Test corner case where -Inf is the minimum. See gh-2019. func = lambda x: -np.e**-x fprime = lambda x: -func(x) x0 = [0] olderr = np.seterr(over='ignore') try: if self.use_wrapper: opts = {'disp': self.disp} x = optimize.minimize(func, x0, jac=fprime, method='BFGS', args=(), options=opts)['x'] else: x = optimize.fmin_bfgs(func, x0, fprime, disp=self.disp) assert_(not np.isfinite(func(x))) finally: np.seterr(**olderr)
def testScipy(): # Initial guess x = 100.0 y = 3.0 step = 0.1 result = fmin_bfgs(quadraticBowl, x0=[x, y], epsilon=step, full_output=True, disp=False) # unpacking xopt = result[0] fopt = result[1] calls = result[4] print "Gradient Descent" print "Number of function calls : %d" % calls print("goal:{}\t coord:({},{})".format(fopt, xopt[0], xopt[1]))
def best_params_huber(t, y, dy, omega, Nterms=1, compute_offset=False, c=3, return_fmin=False): theta_guess = best_params(t, y, dy, omega, Nterms, compute_offset) X = construct_X(t, 1, omega, Nterms, compute_offset) res = optimize.fmin_bfgs(huber_loss, theta_guess, full_output=True, disp=False, args=(y, X, dy, c)) if return_fmin: return res[:2] else: return res[0]
def optimize_mixture(K, pars, model, max_radius, log10_squared_deviation, badness_fn): lnpars = np.log(pars) newlnpars = op.fmin_powell(badness_fn, lnpars, args=(model, max_radius, log10_squared_deviation), maxfun=16384 * 2) lnpars = 1. * newlnpars newlnpars = op.fmin_bfgs(badness_fn, lnpars, args=(model, max_radius, log10_squared_deviation), maxiter=128 * 2) lnpars = 1. * newlnpars newlnpars = op.fmin_cg(badness_fn, lnpars, args=(model, max_radius, log10_squared_deviation), maxiter=128 * 2) return (badness_fn(newlnpars, model, max_radius, log10_squared_deviation), np.exp(newlnpars))
def _fit_bfgs(self, X, y, X_val, Y_val, activations, deltas, coef_grads, intercept_grads, layer_units): # Store meta information for the parameters self._coef_indptr = [] self._intercept_indptr = [] start = 0 # Save sizes and indices of coefficients for faster unpacking for i in range(self.n_layers_ - 1): n_fan_in, n_fan_out = layer_units[i], layer_units[i + 1] end = start + (n_fan_in * n_fan_out) self._coef_indptr.append((start, end, (n_fan_in, n_fan_out))) start = end # Save sizes and indices of intercepts for faster unpacking for i in range(self.n_layers_ - 1): end = start + layer_units[i + 1] self._intercept_indptr.append((start, end)) start = end # Run BFGS packed_coef_inter = _pack(self.coefs_, self.intercepts_) if self.verbose is True or self.verbose >= 1: iprint = 1 else: iprint = -1 optimal_parameters, self.loss_, d, Bopt, func_calls, grad_calls, warnflag = \ optimize.fmin_bfgs(x0=packed_coef_inter, f=self._loss_func, fprime=self._grad_func, maxiter=self.max_iter, disp=False, gtol=self.tol, args=(X, y, activations, deltas, coef_grads, intercept_grads), full_output=True, callback=self._callback) self._unpack(optimal_parameters)
def testIsotropicGaussianKernelHyperparameterLearning(self): hyper = array([1.5, 1.1]) gkernel = SVGaussianKernel_iso(hyper) # marginal likelihood and likelihood gradient # # in MATLAB: # [nlml dnlml] = gpr(log([1.5, 1.1])', 'covSEiso', # [.5, .1, .3; .9, 1.2, .1; .55, .234, .1; .234, .547, .675] , # [.5, 1, .5, 2]') margl, marglderiv = marginalLikelihood(gkernel, self.X, self.Y, len(hyper), True) self.assertAlmostEqual(margl, 7.514, 2) for v, t in zip(marglderiv, [11.4659, -10.0714]): self.assertAlmostEqual(v, t, 2) # compare partial derivatives with result from Rasmussen's code target0 = matrix( '[0 .5543 .0321 .2018; .5543 0 .449 .4945; .0321 .449 0 .2527; .2018 .4945 .2527 0]' ) target1 = matrix( '[2.42 1.769 2.3877 2.2087; 1.769 2.42 1.914 1.8533; 2.3877 1.914 2.42 2.1519; 2.2087 1.8533 2.1519 2.42]' ) pder0 = gkernel.derivative(self.X, 0) pder1 = gkernel.derivative(self.X, 1) for i, (target, pder) in enumerate([(target0, pder0), (target1, pder1)]): for j in xrange(4): self.assertAlmostEqual(target[i, j], pder[i, j], 2) # optimize the marginal likelihood over the log hyperparameters # using BFGS argmin = optimize.fmin_bfgs( nlml, log(hyper), dnlml, args=[SVGaussianKernel_iso, self.X, self.Y], disp=False) for d, t in zip(argmin, [-0.0893, 0.29]): self.assertAlmostEqual(d, t, 2)
def fit_spec_poly5(xData, yData, dyData, order=5): xData = np.array(xData, dtype='f8') yData = np.array(yData, dtype='f8') # Estimate starting coefficients C1 = nanmean(np.diff(yData)) / nanmedian(np.diff(xData)) ind = int(np.median(np.where(~np.isnan(yData)))) C0 = yData[ind] - (C1 * xData[ind]) if order<1: order=1 p0 = [0.0, 0.0, 0.0, 0.0, C1, C0] # Set the order p0 = p0[(-order-1):] def chisq(p, x, y): return np.sum( ((poly5(p)(x) - y)/ dyData)**2.0 ) # Use minimize to perform the fit return op.fmin_bfgs(chisq, p0, args=(xData, yData), full_output=1)
def opt(self, x_init, f_fp=None, f=None, fp=None): """ Run the optimizer """ rcstrings = ['','Maximum number of iterations exceeded', 'Gradient and/or function calls not changing'] opt_dict = {} if self.xtol is not None: print("WARNING: bfgs doesn't have an xtol arg, so I'm going to ignore it") if self.ftol is not None: print("WARNING: bfgs doesn't have an ftol arg, so I'm going to ignore it") if self.gtol is not None: opt_dict['gtol'] = self.gtol opt_result = optimize.fmin_bfgs(f, x_init, fp, disp=self.messages, maxiter=self.max_iters, full_output=True, **opt_dict) self.x_opt = opt_result[0] self.f_opt = f_fp(self.x_opt)[0] self.funct_eval = opt_result[4] self.status = rcstrings[opt_result[6]]
def f(R): for i in range(R.shape[1]): print(str(i) + ' of ' + str(R.shape[1])) Q = R[:, i] if isnan(sum(Q)) == True: x = array([0.25, 0.25, 0.25]) z = array([NaN, NaN, NaN]) else: x = array([0.25, 0.25, 0.25]) z = opti.fmin_bfgs(my_cost, x, args=(Q, r), full_output=False, disp=False, retall=False, gtol=0.01) Z_opti[:, i] = z f_orig[i] = dot((dot(r, x) - Q).transpose(), (dot(r, x) - Q)) f_neu[i] = dot((dot(r, z) - Q).transpose(), (dot(r, z) - Q)) residuals[i] = sum(z) return Z_opti
def _sigmoid_calibration(X, y, T1=None, tol=1e-3): if X.ndim == 1: X = X.reshape(-1, 1) prior0 = float(np.sum(y <= 0)) prior1 = y.shape[0] - prior0 if T1 is None: T = np.zeros(y.shape) T[y <= 0] = (prior1 + 1.) / (prior1 + 2.) T[y > 0] = 1. / (prior0 + 2.) T1 = 1. - T else: T = 1. - T1 def objective(AB): tmp = 0 for i in range(X.shape[1]): tmp += AB[i] * X[:, i] tmp += AB[X.shape[1]] #P = expit(-(AB[0] * X + AB[1])) P = expit(-(tmp)) loss = -(xlogy(T, P) + xlogy(T1, 1. - P)) return loss.sum() def grad(AB): # gradient of the objective function tmp = 0 for i in range(X.shape[1]): tmp += AB[i] * X[:, i] tmp += AB[X.shape[1]] #P = expit(-(AB[0] * X + AB[1])) P = expit(-(tmp)) TEP_minus_T1P = T - P dA = np.dot(TEP_minus_T1P, X) dB = np.sum(TEP_minus_T1P) out_grad = np.append(dA, dB) return out_grad #np.array([dA, dB]) AB0 = np.array([0.] * X.shape[1] + [log((prior0 + 1.) / (prior1 + 1.))]) AB_ = fmin_bfgs(objective, AB0, fprime=grad, disp=False, gtol=tol) return AB_[0:-1], AB_[-1]
def _solve(self, x0: np.ndarray = None): if x0 is None: x0 = np.zeros(self.model.n_coeffs, dtype=self.dtype) obj = self.objective(x0) # A closure to maintain history along internal BFGS's iterations n_iter = [0] prev_x = x0.copy() prev_obj = [obj] def insp(xk): x = xk rel_delta = relative_distance(x, prev_x) prev_x[:] = x obj = self.objective(x) rel_obj = abs(obj - prev_obj[0]) / abs(prev_obj[0]) prev_obj[0] = obj self._handle_history(n_iter[0], force=False, obj=obj, x=xk.copy(), rel_delta=rel_delta, rel_obj=rel_obj) n_iter[0] += 1 insp.n_iter = n_iter insp.self = self insp.prev_x = prev_x insp.prev_obj = prev_obj # We simply call the scipy.optimize.fmin_bfgs routine x_min, f_min, _, _, _, _, _ = \ fmin_bfgs(lambda x: self.model.loss(x) + self.prox.value(x), x0, lambda x: self.model.grad(x) + self._prox_grad(x), maxiter=self.max_iter, gtol=self.tol, callback=insp, full_output=True, disp=False, retall=False) return x_min
def hm(d, b, guess, stateMax, stateInt, stateNum, T=10): ''' Hotz and Miller's CCP method ''' cols = ['ident', 'time', 'x', 'i'] d.columns = cols di = d.i dx = d.x px = d.x dx = dx / stateInt px = px / stateInt dx[0] = 0 px[0] = 0 dt = d.time theta = guess dx = dx.diff() dx = dx * (1 - di) dx = dx * (dt != 0) p = first_step(dx, stateNum) ccp = ccp_est(px, di, stateMax, stateInt) # Better way to deal with pr=0? ccp[ccp == 0] = min(ccp[ccp != 0]) / 4 vtilde = np.log(ccp) - np.log(1 - ccp) eOne = 0.57721 - np.log(np.exp(vtilde) / (1 + np.exp(vtilde))) eZero = 0.57721 - np.log(1 / (1 + np.exp(vtilde))) eEst = np.c_[eZero, eOne].T r = np.arange(stateMax / stateInt) result = fmin_bfgs(hm_log_l, theta, args=(px, di, r, eEst, p, ccp, b, T), maxiter=1, disp=0, full_output=True) theta = result[0] result = [theta.tolist(), p] result = [item for sublist in result for item in sublist] return result
def test_bfgs(self, use_wrapper=False): """ Broyden-Fletcher-Goldfarb-Shanno optimization routine """ if use_wrapper: opts = {'maxit': self.maxiter, 'disp': False, 'return_all': False} res = optimize.minimize(self.func, self.startparams, jac=self.grad, method='BFGS', args=(), options=opts) params, fopt, gopt, Hopt, func_calls, grad_calls, warnflag = \ res['x'], res['fun'], res['jac'], res['hess'], \ res['nfev'], res['njev'], res['status'] else: retval = optimize.fmin_bfgs(self.func, self.startparams, self.grad, args=(), maxiter=self.maxiter, full_output=True, disp=False, retall=False) (params, fopt, gopt, Hopt, func_calls, grad_calls, warnflag) = retval assert_allclose(self.func(params), self.func(self.solution), atol=1e-6) # Ensure that function call counts are 'known good'; these are from # Scipy 0.7.0. Don't allow them to increase. assert_(self.funccalls == 10, self.funccalls) assert_(self.gradcalls == 8, self.gradcalls) # Ensure that the function behaves the same; this is from Scipy 0.7.0 assert_allclose(self.trace[6:8], [[0, -5.25060743e-01, 4.87748473e-01], [0, -5.24885582e-01, 4.87530347e-01]], atol=1e-14, rtol=1e-7)
def test_hcp(testdir): a0 = 3.52 / np.sqrt(2) c0 = np.sqrt(8 / 3.0) * a0 print('%.4f %.3f' % (a0, c0 / a0)) for i in range(3): with Trajectory('Ni.traj', 'w') as traj: eps = 0.01 for a in a0 * np.linspace(1 - eps, 1 + eps, 4): for c in c0 * np.linspace(1 - eps, 1 + eps, 4): ni = bulk('Ni', 'hcp', a=a, covera=c / a) ni.calc = EMT() ni.get_potential_energy() traj.write(ni) configs = read('Ni.traj', index=':') energies = [config.get_potential_energy() for config in configs] ac = [(config.cell[0, 0], config.cell[2, 2]) for config in configs] p = polyfit(ac, energies, 2) a0, c0 = fmin_bfgs(p, (a0, c0)) print('%.4f %.3f' % (a0, c0 / a0)) assert abs(a0 - 2.466) < 0.001 assert abs(c0 / a0 - 1.632) < 0.005
def fit_full_curve(self): # main optimization if self.classic_curve_fitted == True: #R_T_init = (1.0/(self.plsq[0][0]*self.N)) # OLD # 23.05.14 ADM R_T_init = (1.0 / (self.plsq[0][0]) ) # NEW # tunnel resistance of the single junction C_sigma_init = e**2 / (self.plsq[0][2] * k) * 1e15 T_p_init = self.plsq[0][1] * 1e3 else: R_T_init = self.R_tunnel_init C_sigma_init = e**2 / (self.TEC_init * k) * 1e15 T_p_init = self.T_init * 1e3 island_volume_init = self.island_size x1 = [R_T_init, C_sigma_init, T_p_init] if self.bounds == None: self.xopt1 = optimize.fmin_bfgs(self.optimize_1, x1, gtol=1e-3, full_output=1, disp=1, callback=self.call_func) else: "Print optimizing with bounds" self.xopt1 = optimize.fmin_l_bfgs_b(self.optimize_1, x1, factr=1e7, approx_grad=True, bounds=self.bounds) toc = time.clock() print "==========================================" print "====== After main optimization: ======" print "==========================================" # 28.05.14 ADM print "R_T = %g" % (self.xopt1[0][0]) print "T = %g mK" % (self.xopt1[0][2]) print "C_sigma = %g " % (self.xopt1[0][1]) self.full_curve_fitted = True self.T_fit = self.xopt1[0][2] self.R_T = self.xopt1[0][0] self.C_sigma = self.xopt1[0][1]
def __init__(self, kernel, bounds, NX, noise=0.05, xstar=None, **kwargs): super(Synthetic, self).__init__("Synthetic", 0, None, bounds, **kwargs) self.name += ' %d'%len(bounds) self.GP = GaussianProcess(kernel) X = lhcSample(bounds, NX) self.GP.addData([X[0]], [normal(0, 1)]) if xstar is not None: ystar = min(self.GP.Y[0]-1.0, -2.0) self.GP.addData(xstar, ystar) for x in X[1:]: mu, sig2 = self.GP.posterior(x) y = normal(mu, sqrt(sig2)) + normal(0, noise) # preserve min if necessary if xstar is not None and y < ystar+.5: y = ystar+.5 self.GP.addData(x, y) # now, try minimizing with BFGS start = self.GP.X[argmin(self.GP.Y)] xopt = fmin_bfgs(self.GP.mu, start, disp=False) print "\t[synthetic] optimization started at %s, ended at %s" % (start, xopt) if xstar is not None: print '\t[synthetic] realigning minimum' # now, align minimum with what we specified for i, (target, origin) in enumerate(zip(xstar, xopt)): self.GP.X[:,i] += target-origin xopt = xstar self.minimum = self.GP.mu(xopt) self.xstar = xopt # print self.GP.X # print self.GP.Y print '\t[synthetic] x+ = %s, f(x+) = %.3f' % (self.xstar, self.f(self.xstar))
def test7(): f = lambda x: x**2 + 20 * sin(x) x1 = linspace(-10, 10, 100) xopt = optimize.fmin_bfgs(f, 7) xmin = xopt[0] ymin = f(xmin) # basinhopping 全局最小 ret = optimize.basinhopping(f, 0) # print(ret) # fminbound 局部最优 s3 = optimize.fminbound(f, -10, -5) print(s3) # x1min = ret[0] # y1min = f(x1min) subplot(121) plot(x1, f(x1), xmin, ymin, 'r*') subplot(122)
def main(): # read in the student admission data fp = open('ex2data2.txt', 'r') products = [] for line in fp: row = line.strip().split(',') products.append([float(row[0]), float(row[1]), int(row[2])]) dfs = pd.DataFrame(products) dfs.columns = ['Para1', 'Para2', 'pa'] # create y array and x matrix ydata = np.array(dfs['pa']) xdata = np.asmatrix([np.array(dfs['Para1']), np.array(dfs['Para2'])]) xdata = xdata.transpose() # perform base expansion xdata = baseExapnsion(xdata, 6) # regularization parameter lam = 0.1 # use build in optimization function to calculate beta # beta initail guess betaInit = [0] * int(np.shape(xdata)[1]) """ betaOpt = fmin_bfgs(functools.partial(logisticCost, ydata, xdata, False, lam), betaInit, fprime = functools.partial(diff, ydata, xdata, False, lam)) """ betaInit.append(0) betaOptInt = fmin_bfgs(functools.partial(logisticCost, ydata, xdata, True, lam), betaInit, fprime=functools.partial(diff, ydata, xdata, True, lam)) """ scatterPlot(dfs, betaOpt, False) """ scatterPlot(dfs, betaOptInt, True)
def oneVsAll(X,y,num_labels,Lambda): # 初始化变量 m,n = X.shape all_theta = np.zeros((n+1,num_labels)) # 每一列对应相应分类的theta,共10列 X = np.hstack((np.ones((m,1)),X)) # X前补上一列1的偏置bias class_y = np.zeros((m,num_labels)) # 数据的y对应0-9,需要映射为0/1的关系 initial_theta = np.zeros((n+1,1)) # 初始化一个分类的theta # 映射y for i in range(num_labels): class_y[:,i] = np.int32(y==i).reshape(1,-1) # 注意reshape(1,-1)才可以赋值 #np.savetxt("class_y.csv", class_y[0:600,:], delimiter=',') '''遍历每个分类,计算对应的theta值''' for i in range(num_labels): #optimize.fmin_cg result = optimize.fmin_bfgs(costFunction, initial_theta, fprime=gradient, args=(X,class_y[:,i],Lambda)) # 调用梯度下降的优化方法 all_theta[:,i] = result.reshape(1,-1) # 放入all_theta中 all_theta = np.transpose(all_theta) return all_theta
def estimate(self, dat): """ Abstract method that which should be implemented by the children of Distribution. It should provide the functionality to estimate the primary parameters of the distribution from data. If not implemented it tries to use primary2array, array2primary, primaryBounds, and dldtheta to perform a gradient ascent on the log-likelihood. However, note that the parameters obtained are not checked against the particular assumptions for the more specialized distributions. For example, if parameters represent a matrix, which is assumed to be positive definite, this is not accounted for within this general purpose gradient based maximum likelihood. Therefore, if used, a warning is printed. :param dat: data from which the parameters will be estimated :type dat: natter.DataModule.Data """ warningmsg = """Warning: You are using a general purpose (gradient descend)\ fitting procedure. No checking of parameters done, make sure that final\ parameters are within allowed range (such as positive definiteness).""" warn(warningmsg) f = lambda p: self.array2primary(p).all(dat) fprime = lambda p: -mean(self.array2primary(p).dldtheta(dat), 1) / log( 2) / dat.size(0) noboundmethod = False try: tmp = fmin_l_bfgs_b(f, self.primary2array(), fprime, bounds=self.primaryBounds(), factr=10.0)[0] except Errors.AbstractError: noboundmethod = True if noboundmethod: tmp = fmin_bfgs(f, self.primary2array(), fprime)[0] self.array2primary(tmp)
def learningCurve(X, y, X_cv, y_cv): global init_theta, lambda_val m = X.shape[0] error_train = zeros((m, 1)) error_val = zeros((m, 1)) for i in range(1, m): theta = opt.fmin_bfgs(cost, init_theta, fprime=grad, args=(X[0:i, :], y[0:i, :], lambda_val)) h_train = sigmoid(X[0:i, :].dot(theta)) error_train[i] = -1 / (1000 * i) * sum(y[0:i, :] * log(h_train) + (1 - y[0:i, :]) * log(1 - h_train)) h_val = sigmoid(X_cv.dot(theta)) error_val[i] = -1 / (1000 * i) * sum(y[0:i, :] * log(h_val) + (1 - y[0:i, :]) * log(1 - h_val)) return error_train, error_val
def BFGS(X, Y, regularization=0): """ Logistic regression with BFGS optimization and ridge regression. Args: X (ndarray): a 2D array of features for training data, where each row is an obsevation and the columns are features. Y (array): an array of known values corresponding to each row in X. regularization (float): what proportion of the L2 norm of the weights to include in the cost function (default: 0.0). Returns: weights (array): the coefficients produced by the algorithm. """ X_norm, mean, std = normalize(X) X_norm = insert_ones(X_norm) initial_weights = initialize_weights(X_norm) normed_weights = fmin_bfgs(cost, initial_weights, fprime=gradient, args=(X_norm, Y, regularization)) weights = denormalize_weights(normed_weights, mean, std) return weights
def oneVsAll(self): _, n = self.X.shape labels = np.unique(self.y) all_theta = np.zeros((labels.shape[0], n)) y = np.copy(self.y) def objectiveFunc(theta): self.theta[:, 0] = np.copy(theta) [J, _] = self.computeCost() return J def gradFunc(theta): self.theta[:, 0] = np.copy(theta) [_, grad] = self.computeCost() return grad # def resFunc(theta): # self.theta[:, 0] = np.copy(theta) # hypo = self.X @ self.theta # prediction = self.sigmoid(hypo) # err = (self.y - prediction) # return err for label in labels: self.y = np.array([[1 if y[i] == label else 0] for i in range(len(y))]) theta = fmin_bfgs(objectiveFunc, self.theta, fprime=gradFunc, maxiter=100) # calculate theta with bfgs optimization function all_theta[label, :] = theta return all_theta
def refineDetector(grainList, scl=None, gtol=1.0e-6): """ """ if scl is None: scl = numpy.r_[0.005, 200., 1000.] # need to grab initial guess for xc, zTilt # use first grain by default (they all have the same parameters) xc = grainList[0].detectorGeom.xc zTilt = grainList[0].detectorGeom.zTilt chiTilt = grainList[0].detectorGeom.chiTilt if chiTilt is None: chiTilt = 0. x0 = scl * numpy.r_[xc, zTilt, chiTilt] # call to optimization routine xopt = optimize.fmin_bfgs(objFunc, x0, args=(grainList, scl), gtol=gtol) # recall objective to set detector geometries properly with solution objFunc(xopt, grainList, scl) return xopt / scl
def data2AB(data, x0=None): n = data.shape[0] T = data.shape[1] YY = np.dot(data[:, 1:], data[:, 1:].T) XX = np.dot(data[:, :-1], data[:, :-1].T) YX = np.dot(data[:, 1:], data[:, :-1].T) model = VAR(data.T) r = model.fit(1) A = r.coefs[0, :, :] #A = np.ones((n,n)) B = np.ones((n, n)) np.fill_diagonal(B, 0) B[np.triu_indices(n)] = 0 K = np.int(scipy.sum(abs(B))) #abs(A)+abs(B))) a_idx = np.where(A != 0) b_idx = np.where(B != 0) np.fill_diagonal(B, 1) try: s = x0.shape x = x0 except AttributeError: x = np.r_[A.flatten(), 0.1 * scipy.randn(K)] o = optimize.fmin_bfgs(nllf2, x, args=(np.double(A), np.double(B), YY, XX, YX, T, a_idx, b_idx), gtol=1e-12, maxiter=500, disp=False, full_output=True) ipdb.set_trace() A, B = x2M(o[0], np.double(A), np.double(B), a_idx, b_idx) B = B + B.T return A, B
def _solve_n3plus(self, C): """ For the n=3 case, find the optimum value for mu, given an interval count matrix C Args: C (numpy.array): Possible interval count matrix Returns: mu (n-tuple of floats): Optimum value for mu likelihood (float): The likelihood at the optimum mu vals (list of floats): """ global dLambda_dMu_numers dLambda_dMu_numers = [dLambda_dMu_numers[0]] + [[]] * (self.n) # Find a root for derivative functions C_w = weighted_C(C, self.rN) C_hat = normalize_C(C_w, self.m, self.n) start = [1.0 / self.n] * (self.n) + [1] val = optimize.fsolve(equations, start, args = (self.r,self.m,C_hat, self.n),\ fprime = jacobian) mu = val[:self.n] if not inRange(mu): #In the case that we find the wrong root (one of the values is negative), # directly minimize the function start = [1.0 / self.n] * (self.n - 1) mu = optimize.fmin_bfgs(L3_hat, start, fprime = dL3_hat, args = \ (C_hat, self.r, self.m, self.n), disp=0) mu = mu.tolist() mu.append(1 - sum(mu)) if not inRange(mu): #Case that a minimum doesn't exist return None answer = M3(C_w, mu, self.m, self.n) likelihood, vals = L3(answer, C_w, self.r, self.m, self.n) return (answer, likelihood, vals)