def gradient_descent(CF, theta, eps, opt=1, verbose=False): """ Gradient descent Options: opt = 0: learning rate alpha is a constant opt = 1: learning rate alpha is optimized at each iteration Default is opt = 0 """ alpha = 0.05 #*10 prev_cost = 1000 cost = 0 diff = np.abs(prev_cost - cost) min_cost = [] while diff > eps: cost, delta = CF.cost_and_gradient(theta) min_cost.append(cost) theta[0] = theta[0] - alpha * delta[0] theta[1:] = (1 - alpha * CF.l * 1. / CF.m) * theta[1:] - alpha * delta[1:] if opt == 1: # Update alpha at each iteration (convergence fastened) hess = CF.compute_hessian(theta) #hess = 1./CF.m*hess #delta = np.matrix(delta).T #alpha = np.dot(delta.T,delta)/(np.dot(delta.T,np.dot(hess,delta))) #alpha = alpha[0,0] A = np.matrix(1. / CF.m * A) print type(delta[1:]), type(A) alpha = np.dot(delta[1:].T, delta[1:]) / (np.dot( delta[1:].T, np.dot(A, delta[1:]))) alpha = alpha[0, 0] diff = np.abs(prev_cost - cost) prev_cost = cost if verbose: if CF.n == 2: plot_db(CF.x, CF.y, theta) if CF.n == 3: plot_db_3d(CF.x, CF.y, theta) plt.show() return theta, min_cost
def gradient_descent(CF,theta,eps,opt=1,verbose=False): """ Gradient descent Options: opt = 0: learning rate alpha is a constant opt = 1: learning rate alpha is optimized at each iteration Default is opt = 0 """ alpha = 0.05#*10 prev_cost = 1000 cost = 0 diff = np.abs(prev_cost-cost) min_cost = [] while diff > eps: cost, delta = CF.cost_and_gradient(theta) min_cost.append(cost) theta[0]=theta[0]-alpha*delta[0] theta[1:]=(1-alpha*CF.l*1./CF.m)*theta[1:]-alpha*delta[1:] if opt == 1: # Update alpha at each iteration (convergence fastened) hess = CF.compute_hessian(theta) #hess = 1./CF.m*hess #delta = np.matrix(delta).T #alpha = np.dot(delta.T,delta)/(np.dot(delta.T,np.dot(hess,delta))) #alpha = alpha[0,0] A = np.matrix(1./CF.m*A) print type(delta[1:]), type(A) alpha = np.dot(delta[1:].T,delta[1:])/(np.dot(delta[1:].T,np.dot(A,delta[1:]))) alpha = alpha[0,0] diff = np.abs(prev_cost-cost) prev_cost = cost if verbose: if CF.n == 2: plot_db(CF.x,CF.y,theta) if CF.n == 3: plot_db_3d(CF.x,CF.y,theta) plt.show() return theta, min_cost
def logistic_reg(x,y,theta,l=0,verbose=0,method='g'): """ Determines theta vector for a given polynomial degree and lambda x is a panda DataFrame y is a panda DataFrame l = 0: regularization coefficient / default is no regularization Methods for cost function minimization (default is gradient descent): 'g': gradient descent 'cg': conjugate gradient 'bfgs': BFGS (Broyden Fletcher Goldfarb Shanno) """ # Number of features n = x.shape[1] # Number of training set examples m = x.shape[0] # Number of classes K = len(y.columns) if len(theta[1]) != n+1: print "In logistic_reg.py:\nproblem of dimension between number of features and number of parameters !!" print "Number of features:", n print "Length of theta vector:", len(theta[1]) sys.exit() for k in range(1,K+1): theta[k] = np.array(theta[k],dtype=float) CF = CostFunction(x,y.values[:,k-1],l) if verbose: if n == 1: from plot_functions import plot_hyp_func_1f, plot_sep_1f syn, hyp = hypothesis(x.min(),x.max(),theta[k]) plot_hyp_func_1f(x,y[k],syn,hyp,threshold=.5) if n == 2: from plot_functions import plot_db plot_db(x,y[k],theta[k],lim=3,title='Initial decision boundary') if n == 3: from plot_functions import plot_db_3d plot_db_3d(x,y[k],theta[k],lim=3,title='Initial decision boundary') method = 'bfgs' stop = 10**-5 if method == 'cg': # Conjugate gradient from scipy.optimize import fmin_cg theta[k],allvecs = fmin_cg(CF.compute_cost,theta[k],fprime=CF.compute_gradient,gtol=stop,disp=False,retall=True)#,maxiter=1000) elif method == 'bfgs': # BFGS (Broyden Fletcher Goldfarb Shanno) from scipy.optimize import fmin_bfgs theta[k],allvecs = fmin_bfgs(CF.compute_cost,theta[k],fprime=CF.compute_gradient,gtol=stop,disp=False,retall=True) elif method == 'g': # Gradient descent theta[k],min_cost = gradient_descent(CF,theta[k],stop,opt=0) allvecs = None if verbose: if allvecs: min_cost = [] for vec in allvecs: min_cost.append(CF.compute_cost(vec)) nb_iter = len(min_cost) #plot_cost_function_iter(nb_iter,min_cost) #plt.show() if verbose: if n == 1 and K == 1: from plot_functions import plot_hyp_func_1f syn, hyp = hypothesis(x.min(),x.max(),theta[1]) plot_hyp_func_1f(x,y[1],syn,hyp,threshold=.5) if n == 2: if K != 1: from plot_functions import plot_multiclass_2d plot_multiclass_2d(x,theta) else: from plot_functions import plot_db plot_db(x,y,theta[1],title='Decision boundary') if n == 3: if K != 1: from plot_functions import plot_multiclass_3d plot_multiclass_3d(x,theta) else: from plot_functions import plot_db_3d plot_db_3d(x,y,theta[1],title='Decision boundary') return theta
def logistic_reg(x, y, theta, l=0, verbose=0, method='g'): """ Determines theta vector for a given polynomial degree and lambda x is a panda DataFrame y is a panda DataFrame l = 0: regularization coefficient / default is no regularization Methods for cost function minimization (default is gradient descent): 'g': gradient descent 'cg': conjugate gradient 'bfgs': BFGS (Broyden Fletcher Goldfarb Shanno) """ # Number of features n = x.shape[1] # Number of training set examples m = x.shape[0] # Number of classes K = len(y.columns) if len(theta[1]) != n + 1: print "In logistic_reg.py:\nproblem of dimension between number of features and number of parameters !!" print "Number of features:", n print "Length of theta vector:", len(theta[1]) sys.exit() for k in range(1, K + 1): theta[k] = np.array(theta[k], dtype=float) CF = CostFunction(x, y.values[:, k - 1], l) if verbose: if n == 1: from plot_functions import plot_hyp_func_1f, plot_sep_1f syn, hyp = hypothesis(x.min(), x.max(), theta[k]) plot_hyp_func_1f(x, y[k], syn, hyp, threshold=.5) if n == 2: from plot_functions import plot_db plot_db(x, y[k], theta[k], lim=3, title='Initial decision boundary') if n == 3: from plot_functions import plot_db_3d plot_db_3d(x, y[k], theta[k], lim=3, title='Initial decision boundary') method = 'bfgs' stop = 10**-5 if method == 'cg': # Conjugate gradient from scipy.optimize import fmin_cg theta[k], allvecs = fmin_cg(CF.compute_cost, theta[k], fprime=CF.compute_gradient, gtol=stop, disp=False, retall=True) #,maxiter=1000) elif method == 'bfgs': # BFGS (Broyden Fletcher Goldfarb Shanno) from scipy.optimize import fmin_bfgs theta[k], allvecs = fmin_bfgs(CF.compute_cost, theta[k], fprime=CF.compute_gradient, gtol=stop, disp=False, retall=True) elif method == 'g': # Gradient descent theta[k], min_cost = gradient_descent(CF, theta[k], stop, opt=0) allvecs = None if verbose: if allvecs: min_cost = [] for vec in allvecs: min_cost.append(CF.compute_cost(vec)) nb_iter = len(min_cost) #plot_cost_function_iter(nb_iter,min_cost) #plt.show() if verbose: if n == 1 and K == 1: from plot_functions import plot_hyp_func_1f syn, hyp = hypothesis(x.min(), x.max(), theta[1]) plot_hyp_func_1f(x, y[1], syn, hyp, threshold=.5) if n == 2: if K != 1: from plot_functions import plot_multiclass_2d plot_multiclass_2d(x, theta) else: from plot_functions import plot_db plot_db(x, y, theta[1], title='Decision boundary') if n == 3: if K != 1: from plot_functions import plot_multiclass_3d plot_multiclass_3d(x, theta) else: from plot_functions import plot_db_3d plot_db_3d(x, y, theta[1], title='Decision boundary') return theta