def compute_gamma_linesearch(gamma_min, gamma_max, delta_max, cost_min, cost_max, d, D, kernel_matrices, J_prev, y_mat, alpha, C, goldensearch_precision_factor): gold_ratio = (5**0.5 + 1) / 2 ## print "stepmin",gamma_min ## print "stepmax",gamma_max ## print "deltamax",delta_max gamma_arr = np.array([gamma_min, gamma_max]) cost_arr = np.array([cost_min, cost_max]) coord = np.argmin(cost_arr) ## print 'linesearch conditions' ## print 'gamma_min',gamma_min ## print 'gamma_max',gamma_max ## print 'delta_max',delta_max ## print 'golden search precision factor', goldensearch_precision_factor while ((gamma_max - gamma_min) > goldensearch_precision_factor * (abs(delta_max)) and gamma_max > np.finfo(float).eps): # print 'in line search loop' gamma_medr = gamma_min + (gamma_max - gamma_min) / gold_ratio gamma_medl = gamma_min + (gamma_medr - gamma_min) / gold_ratio tmp_d = d + gamma_medr * D alpha_r, cost_medr = compute_J_SVM( k_helpers.get_combined_kernel(kernel_matrices, tmp_d), y_mat, C) tmp_d = d + gamma_medl * D alpha_l, cost_medl = compute_J_SVM( k_helpers.get_combined_kernel(kernel_matrices, tmp_d), y_mat, C) cost_arr = np.array([cost_min, cost_medl, cost_medr, cost_max]) gamma_arr = np.array([gamma_min, gamma_medl, gamma_medr, gamma_max]) coord = np.argmin(cost_arr) if coord == 0: gamma_max = gamma_medl cost_max = cost_medl alpha = alpha_l if coord == 1: gamma_max = gamma_medr cost_max = cost_medr alpha = alpha_r if coord == 2: gamma_min = gamma_medl cost_min = cost_medl alpha = alpha_l if coord == 3: gamma_min = gamma_medr cost_min = cost_medr alpha = alpha_r if cost_arr[coord] < J_prev: return gamma_arr[coord], alpha, cost_arr[coord] else: return gamma_min, alpha, cost_min
def get_armijos_step_size(iteration, C, kernel_matrices, d, y_mat, alpha0, gamma0, Jd, D, dJ, c=0.5, T=0.5): # print 'descent direction in armijos function' # print D #m = D' * dJ, should be negative #Loop until f(x + gamma * p <= f(x) + gamma*c*m) # J(d + gamma * D) <= J(d) + gamma * c * m gamma = gamma0 m = D.T.dot(dJ) while True: combined_kernel_matrix = k_helpers.get_combined_kernel( kernel_matrices, d + gamma * D) alpha, new_J, alpha_indices = compute_J_SVM(combined_kernel_matrix, y_mat, C) if new_J <= Jd + gamma * c * m: return gamma else: #Update gamma gamma = gamma * T return gamma
def get_armijos_step_size(iteration, C, kernel_matrices, d, y_mat, alpha0, gamma0, Jd, D, dJ, c=0.5, T=0.5): # print 'descent direction in armijos function' # print D # m = D' * dJ, should be negative # Loop until f(x + gamma * p <= f(x) + gamma*c*m) # J(d + gamma * D) <= J(d) + gamma * c * m gamma = gamma0 m = D.T.dot(dJ) while True: combined_kernel_matrix = k_helpers.get_combined_kernel(kernel_matrices, d + gamma * D) alpha, new_J, alpha_indices = compute_J_SVM(combined_kernel_matrix, y_mat, C) if new_J <= Jd + gamma * c * m: return gamma else: # Update gamma gamma = gamma * T return gamma
def compute_gamma_linesearch( gamma_min, gamma_max, delta_max, cost_min, cost_max, d, D, kernel_matrices, J_prev, y_mat, alpha, C, goldensearch_precision_factor, ): gold_ratio = (5 ** 0.5 + 1) / 2 ## print "stepmin",gamma_min ## print "stepmax",gamma_max ## print "deltamax",delta_max gamma_arr = np.array([gamma_min, gamma_max]) cost_arr = np.array([cost_min, cost_max]) coord = np.argmin(cost_arr) ## print 'linesearch conditions' ## print 'gamma_min',gamma_min ## print 'gamma_max',gamma_max ## print 'delta_max',delta_max ## print 'golden search precision factor', goldensearch_precision_factor while (gamma_max - gamma_min) > goldensearch_precision_factor * (abs(delta_max)) and gamma_max > np.finfo( float ).eps: # print 'in line search loop' gamma_medr = gamma_min + (gamma_max - gamma_min) / gold_ratio gamma_medl = gamma_min + (gamma_medr - gamma_min) / gold_ratio tmp_d = d + gamma_medr * D alpha_r, cost_medr = compute_J_SVM(k_helpers.get_combined_kernel(kernel_matrices, tmp_d), y_mat, C) tmp_d = d + gamma_medl * D alpha_l, cost_medl = compute_J_SVM(k_helpers.get_combined_kernel(kernel_matrices, tmp_d), y_mat, C) cost_arr = np.array([cost_min, cost_medl, cost_medr, cost_max]) gamma_arr = np.array([gamma_min, gamma_medl, gamma_medr, gamma_max]) coord = np.argmin(cost_arr) if coord == 0: gamma_max = gamma_medl cost_max = cost_medl alpha = alpha_l if coord == 1: gamma_max = gamma_medr cost_max = cost_medr alpha = alpha_r if coord == 2: gamma_min = gamma_medl cost_min = cost_medl alpha = alpha_l if coord == 3: gamma_min = gamma_medr cost_min = cost_medr alpha = alpha_r if cost_arr[coord] < J_prev: return gamma_arr[coord], alpha, cost_arr[coord] else: return gamma_min, alpha, cost_min
def find_kernel_weights(d_init, kernel_matrices, C, y, verbose): ##########################################Initialization, starting from a point d weight_precision = 1e-08 #weights below this value are set to 0 goldensearch_precision = 1e-01 goldensearch_precision_init = 1e-01 max_goldensearch_precision = 1e-08 duality_gap_threshold = 0.01 #search stopping criteria defined in the paper for m in kernel_matrices: assert m.shape == (y.shape[0], y.shape[0]) M = len(kernel_matrices) #how many kernels we have d = d_init #initial guessed weights of each kernel, d_m=1/M, where M is the number of kernels y_mat = np.outer(y, y) #Creates y matrix for use in SVM later iteration = 0 stop_state = False #loop parameter ##########################################ALgorithm 1 pseudocode defined in the simpleMKL paper: #stop_state: check the dual gap between the primal MKL and dual MKL in each loop #d: weighted vector d={d1, d2 ... dm}, m=1,2...M kernels #dJ: gradient vector computed on current d vector #D: reduced gradient descent direction vector computed based on dJ and equality constraint while ( not stop_state ): #while loop until minimizers of d and corresponding alphas are found if verbose == 1: print "iteration:", iteration print "d:", d old_d = d.copy() #########################################SVM computation to get current d value and J(d) value combined_kernel_matrix = k_helpers.get_combined_kernel( kernel_matrices, d ) #given current d vector value, compute the combined kernel matrices value alpha, J = helpers.compute_J_SVM( combined_kernel_matrix, y_mat, C ) #SVM wrapper to solve alphas given current d and J(d) value at this point dJ = helpers.compute_dJ( kernel_matrices, y_mat, alpha ) #compute current GRADIENT of J(d),m-dimension vector given alpha values mu = np.argmax( d) #mu is the index of the largest component in d vector D = helpers.compute_reduced_descent_direction( d, dJ, mu ) #compute the REDUCED gradient direction based on equality constraint on current d vector if verbose == 1: print 'current gradient:' print dJ print 'current alpha, J: ', alpha, J print 'current reduced descent: ', D J_cross = 0 d_cross = d D_cross = D counter = 1 J_prev = J while ( J_cross < J ): #an efficient update of d vector without the need to recompute the gradient at each new d vector #update d vector, D vector to d_cross and D_cross and corresponding J_cross value d = d_cross #update d D = D_cross #update reduced gradient if counter > 1: #in the start of the while loop, J_cross = 0 and will lead to a bug J = J_cross #update function value #compute the maximum step size based on current d and D value gamma_max = helpers.compute_max_admissible_gamma( d, D ) #compute the largest step size based on current d and D value delta_max = gamma_max #update d_cross based on gamma_max d_cross = d + gamma_max * D #d_cross is a new point along the direction of reduced gradient D with step size gamma_max, so one component of d_cross will reach zero #update J_cross based on d_cross combined_kernel_matrix_cross = k_helpers.get_combined_kernel( kernel_matrices, d_cross) #combined kernel given the new d_cross vector alpha_cross, J_cross = helpers.compute_J_SVM( combined_kernel_matrix_cross, y_mat, C) #compute the SVM solution with the d_cross vector if J_cross < J: #only update D_cross when J_cross < J(d) D_cross = helpers.update_reduced_descent_direction( d_cross, D, mu, weight_precision ) #update the reduced gradient direction when the function value keeps decreasing counter = counter + 1 if verbose == 1: print "updated cost: ", J_cross print "d cross:" print d_cross print "counter:", counter print "updated D_cross:" print D_cross #Now J(d_cross) > J(d), keep in mind that d has been updated several times before. #Do line-search along direction of D (no further update) to obtain the point which minimizes function value J between point d and point d_cross. gamma, alpha, J = helpers.compute_gamma_linesearch( 0, gamma_max, delta_max, J, J_cross, d, D, kernel_matrices, J_prev, y_mat, alpha, C, goldensearch_precision) d = d + gamma * D #update d to the new point to further decrease J value, gamma might be zero, i.e no update # numerical cleaning d = helpers.fix_weight_precision(d, weight_precision) # improve line search by enhancing precision if max( abs(d - old_d) ) < weight_precision and goldensearch_precision > max_goldensearch_precision: goldensearch_precision = goldensearch_precision / 10 dJ_curr_d = helpers.compute_dJ( kernel_matrices, y_mat, alpha ) #compute the gradient value at the new point d and corresponding alpha values # stopping criterion: check difference between primal J(d) and dual function value duality_gap = (J + np.max(-dJ_curr_d) - np.sum(alpha)) / J print 'duality gap: ', duality_gap if duality_gap < duality_gap_threshold: stop_state = True iteration += 1 return (d, k_helpers.get_combined_kernel(kernel_matrices, d), J, alpha, duality_gap)
def find_kernel_weights(k_init,kernel_matrices,C,y): # various parameters weight_precision=1e-08 #weights below this value are set to 0 goldensearch_precision=1e-01 goldensearch_precision_init=1e-01 max_goldensearch_precision=1e-08 duality_gap_threshold=0.01 for m in kernel_matrices: assert m.shape == (y.shape[0],y.shape[0]) M = len(kernel_matrices) #initial weights of each kernel d = k_init #Creates y matrix for use in SVM later y_mat = np.outer(y, y) iteration = 0 # initialization for stopping criterion stop_state=False # initial alphas combined_kernel_matrix = k_helpers.get_combined_kernel(kernel_matrices, d) alpha,J=helpers.compute_J_SVM(combined_kernel_matrix, y_mat,C) ## print 'initial alpha, J: ',alpha, J while(not stop_state): ## print "iteration:",iteration ## print "d:",d old_d=d.copy() dJ = helpers.compute_dJ(kernel_matrices, y_mat, alpha) ## print 'gradient before entering while loop' ## print dJ mu=np.argmax(d) D = helpers.compute_descent_direction(d, dJ,mu) ## print 'initial descent: ',D gamma_max=helpers.compute_max_admissible_gamma(d,D) delta_max=gamma_max if gamma_max>0.1: gamma_max=0.1 J_cross=0 J_prev=J while (J_cross < J): ## print 'cost min: ',J ## print 'cost max: ',J_cross d_cross = d + gamma_max*D combined_kernel_matrix_cross = k_helpers.get_combined_kernel(kernel_matrices, d_cross) alpha_cross, J_cross= helpers.compute_J_SVM(combined_kernel_matrix_cross, y_mat,C) ## print "updated cost max: ",J_cross if J_cross<J: J=J_cross d = d_cross.copy() ## print 'updated weights: ', d alpha=alpha_cross.copy() # update descent ## print 'descent before update: ',D D=helpers.update_descent_direction(d,D,mu,weight_precision) ## print 'updated descent: ', D # gamma_max=helpers.compute_max_admissible_gamma(d,D) ## tmp_ind=np.where(D<0)[0] if tmp_ind.shape[0]>0: gamma_max=np.min(-(np.divide(d[tmp_ind],D[tmp_ind]))) delta_max=gamma_max J_cross=0 else: gamma_max=0 delta_max=0 # print 'support vector before line search',-np.sum(abs(alpha)) # line-search gamma, alpha, J=helpers.compute_gamma_linesearch(0,gamma_max,delta_max,J,J_cross, d,D,kernel_matrices, J_prev,y_mat,alpha,C,goldensearch_precision) # print 'support vector after line search',-np.sum(abs(alpha)) ## print 'weights before final update',d ## print 'gamma after line search',gamma ## print 'descent',D d = d + gamma * D # numerical cleaning d = helpers.fix_weight_precision(d,weight_precision) ## print 'weights after final update: ',d # improve line search by enhancing precision if max(abs(d-old_d))<weight_precision and \ goldensearch_precision>max_goldensearch_precision: goldensearch_precision=goldensearch_precision/10 ## elif goldensearch_precision!=goldensearch_precision_init: ## goldensearch_precision*=10 ## print 'weights after linesearch',d # print 'support vectors used to compute current gradient' # print alpha dJ_curr_d = helpers.compute_dJ(kernel_matrices, y_mat, alpha) ## print 'parameters in computing duality gap' ## print J ## print np.max(-dJ_curr_d) ## print -np.sum(alpha) # stopping criterion duality_gap=(J+np.max(-dJ_curr_d) -np.sum(alpha))/J # print 'duality gap: ',duality_gap if duality_gap<duality_gap_threshold: stop_state=True iteration += 1 return (d,k_helpers.get_combined_kernel(kernel_matrices, d),J,alpha,duality_gap)