def run_nesterov(X_train, y_train, X_validate, y_validate, group_feature_sizes, initial_lambda1=DEFAULT_LAMBDA): def _get_accelerated_lambdas(curr_lambdas, prev_lambdas, iter_num): return np.maximum( curr_lambdas + (iter_num - 2) / (iter_num + 1.0) * (curr_lambdas - prev_lambdas), np.minimum(curr_lambdas, MIN_LAMBDA) ) method_label = "HillclimbGroupedLasso Nesterov" curr_regularizations = np.concatenate(( np.ones(len(group_feature_sizes)) * initial_lambda1, # lambda1s [initial_lambda1] )) prev_regularizations = curr_regularizations acc_regularizations = curr_regularizations problem_wrapper = GroupedLassoProblemWrapper(X_train, y_train, group_feature_sizes) betas = problem_wrapper.solve(acc_regularizations) best_cost = testerror_grouped(X_validate, y_validate, betas) best_betas = betas # track progression cost_path = [best_cost] # Perform Nesterov with adaptive restarts i_max = 3 while i_max > 2: print "restart! with i_max", i_max for i in range(2, NUMBER_OF_ITERATIONS + 1): i_max = i lambda_derivatives = _get_lambda_derivatives(X_train, y_train, X_validate, y_validate, betas, acc_regularizations) if np.array_equal(lambda_derivatives, np.array([0] * lambda_derivatives.size)): print method_label, "derivatives zero. break." break curr_regularizations = _get_updated_lambdas(acc_regularizations, STEP_SIZE, lambda_derivatives, use_boundary=True) acc_regularizations = _get_accelerated_lambdas(curr_regularizations, prev_regularizations, i) prev_regularizations = curr_regularizations potential_betas = problem_wrapper.solve(acc_regularizations) current_cost = testerror_grouped(X_validate, y_validate, potential_betas) is_decreasing_significantly = best_cost - current_cost > DECREASING_ENOUGH_THRESHOLD if current_cost < best_cost: best_cost = current_cost best_betas = potential_betas cost_path.append(current_cost) betas = potential_betas if not is_decreasing_significantly: print method_label, "DECREASING TOO SLOW" break print method_label, "iter", i, "current cost", current_cost, "best cost", best_cost, "lambdas:", acc_regularizations print method_label, "best cost", best_cost, "best lambdas:", acc_regularizations return best_betas, cost_path
class SGL_Nelder_Mead(Nelder_Mead_Algo): method_label = "SGL_Nelder_Mead" MAX_COST = 100000 def _create_problem_wrapper(self): self.problem_wrapper = GroupedLassoProblemWrapper( self.data.X_train, self.data.y_train, self.settings.get_expert_group_sizes() ) def get_validation_cost(self, lambdas): # if any are not positive, then just return max value for l in lambdas: if l <= 0: return self.MAX_COST model_params = self.problem_wrapper.solve(lambdas, quick_run=True) validation_cost = testerror_grouped( self.data.X_validate, self.data.y_validate, model_params ) self.log("validation_cost %f" % validation_cost) return validation_cost
class SGL_Nelder_Mead(Nelder_Mead_Algo): method_label = "SGL_Nelder_Mead" MAX_COST = 100000 def _create_problem_wrapper(self): self.problem_wrapper = GroupedLassoProblemWrapper( self.data.X_train, self.data.y_train, self.settings.get_expert_group_sizes()) def get_validation_cost(self, lambdas): # if any are not positive, then just return max value for l in lambdas: if l <= 0: return self.MAX_COST model_params = self.problem_wrapper.solve(lambdas, quick_run=True) validation_cost = testerror_grouped(self.data.X_validate, self.data.y_validate, model_params) self.log("validation_cost %f" % validation_cost) return validation_cost
def run(X_train, y_train, X_validate, y_validate, group_feature_sizes, initial_lambda1=DEFAULT_LAMBDA): print "BACKTRACK_ALPHA", BACKTRACK_ALPHA method_step_size = STEP_SIZE method_label = HC_GROUPED_LASSO_LABEL curr_regularizations = np.concatenate(( np.ones(len(group_feature_sizes)) * initial_lambda1, [initial_lambda1] )) problem_wrapper = GroupedLassoProblemWrapper(X_train, y_train, group_feature_sizes) betas = problem_wrapper.solve(curr_regularizations) best_beta = betas best_cost = testerror_grouped(X_validate, y_validate, betas) # track progression cost_path = [best_cost] shrink_factor = 1 for i in range(1, NUMBER_OF_ITERATIONS): lambda_derivatives = _get_lambda_derivatives(X_train, y_train, X_validate, y_validate, betas, curr_regularizations) # numeric_derivs = _check_lambda_derivatives(problem_wrapper, X_validate, y_validate, curr_regularizations) # _compare_numeric_calculated_derivs(lambda_derivatives, numeric_derivs) if np.any(np.isnan(lambda_derivatives)): print "some value in df_dlambda is nan" break # do the gradient descent! pot_lambdas = _get_updated_lambdas(curr_regularizations, shrink_factor * method_step_size, lambda_derivatives) # get corresponding beta pot_betas = problem_wrapper.solve(pot_lambdas) try: pot_cost = testerror_grouped(X_validate, y_validate, pot_betas) except ValueError as e: print "value error", e pot_cost = 1e10 backtrack_check = best_cost - BACKTRACK_ALPHA * shrink_factor * method_step_size * np.linalg.norm(lambda_derivatives)**2 backtrack_check = best_cost if backtrack_check < 0 else backtrack_check while pot_cost > backtrack_check and shrink_factor > MIN_SHRINK: print "bactrack?", pot_cost, backtrack_check shrink_factor *= SHRINK_SHRINK backtrack_check = best_cost - BACKTRACK_ALPHA * shrink_factor * method_step_size * np.linalg.norm(lambda_derivatives)**2 print "backtrack_check?", backtrack_check backtrack_check = best_cost if backtrack_check < 0 else backtrack_check print "shrink!", shrink_factor pot_lambdas = _get_updated_lambdas(curr_regularizations, shrink_factor * method_step_size, lambda_derivatives) pot_betas = problem_wrapper.solve(pot_lambdas) try: pot_cost = testerror_grouped(X_validate, y_validate, pot_betas) except ValueError as e: print "value error", e pot_cost = 1e10 print "pot_cost", pot_cost is_decreasing_signficantly = best_cost - pot_cost > DECREASING_ENOUGH_THRESHOLD betas = pot_betas curr_regularizations = pot_lambdas if pot_cost < best_cost: best_cost = pot_cost best_beta = betas if not is_decreasing_signficantly: print "is_decreasing_signficantly NO!" break if shrink_factor <= MIN_SHRINK: print method_label, "shrink factor too small" break # track progression cost_path.append(pot_cost) print method_label, "iter", i, "best cost", best_cost #, "lambdas:", curr_regularizations print method_label, "best cost", best_cost, "lambdas:", curr_regularizations return best_beta, cost_path