def test_newton_fail(): # f(x) = integral_{-infty}^x arctan(t) dt class Oracle(oracles.BaseSmoothOracle): def func(self, x): return x * np.arctan(x) - 0.5 * np.log(np.power(x, 2) + 1) def grad(self, x): return np.arctan(x) def hess(self, x): return np.array([1 / (np.power(x, 2) + 1)]) x0 = np.array([10.0]) warnings.filterwarnings("ignore") [x_star, msg, history] = optimization.newton(Oracle(), x0, display=False, trace=False, line_search_options={ 'method': 'Constant', 'c': 1 }) warnings.filterwarnings("default") eq_(msg, 'computational_error') eq_(history, None)
def test_newton_1d(): oracle = get_1d(0.5) x0 = np.array([1.0]) FUNC = [ np.array([2.14872127]), np.array([0.9068072]), np.array([0.89869455]), np.array([0.89869434]) ] GRAD_NORM = [ 1.8243606353500641, 0.14023069594489929, 0.00070465169721295462, 1.7464279966628027e-08 ] TIME = [0] * 4 # Dummy values. X = [ np.array([1.]), np.array([-0.29187513]), np.array([-0.40719141]), np.array([-0.40777669]) ] TRUE_HISTORY = {'func': FUNC, 'grad_norm': GRAD_NORM, 'time': TIME, 'x': X} # Constant step size. [x_star, msg, history] = optimization.newton(oracle, x0, max_iter=5, tolerance=1e-10, trace=True, line_search_options={ 'method': 'Constant', 'c': 1.0 }) ok_(np.allclose(x_star, [-0.4077777], atol=1e-4)) eq_(msg, 'success') check_equal_histories(history, TRUE_HISTORY)
def plot_results(dataset): """ Plots function values dependency and squared norm of gradient on time for gradient descent and newton optimization methods :param dataset: One of 'w8a', 'gissete', 'real-sim' :return: """ available_datasets = ['w8a', 'gissete', 'real-sim'] if dataset not in available_datasets: raise ValueError( "Dataset {0} currently is not supported. Available datasets are: {1}" .format(dataset, ' '.join(available_datasets))) A, b = load_svmlight_file('./data/{}'.format(dataset)) oracle = oracles.create_log_reg_oracle(A, b, 1 / len(b)) x_init = np.zeros(A.shape[1]) [_, _, history_grad] = optimization.gradient_descent(oracle, x_init, line_search_options={ 'method': 'Wolfe', 'c': 1 }, trace=True) [_, _, history_newton] = optimization.newton(oracle, x_init, line_search_options={ 'method': 'Wolfe', 'c': 1 }, trace=True) plot_function_values_on_time(dataset, history_grad, history_newton) plot_grad_norm_values_on_time(dataset, history_grad, history_newton)
def experiment_4(): path = 'data' np.random.seed(31415) datasets = ["w8a", "gisette_scale", "real-sim"] for dataset in datasets: A, b = load_svmlight_file(path + '/' + dataset) m = b.size oracle = create_log_reg_oracle(A, b, 1.0 / m, "optimized") x_0 = np.zeros((A.shape[1], )) # x_opt1, message, history1 = gradient_descent(oracle, x_0, trace=True) if dataset != 'real-sim': x_opt2, message, history2 = newton(oracle, x_0, trace=True) print(len(history2['time']), history2['time'][-1]) continue plt.figure() plt.plot(history1['time'], history1['func'], label='GD') if dataset != 'real-sim': plt.plot(history2['time'], history2['func'], label='Newton') plt.xlabel('Время от начала эксперимента в секундах') plt.ylabel('Значение функции потерь') plt.legend() plt.grid() plt.savefig("pics/logreg_loss_value_vs_time_" + dataset) plt.figure() plt.plot(history1['time'], 2 * np.log( (history1['grad_norm'] / history1['grad_norm'][0])), label='GD') if dataset != 'real-sim': plt.plot(history2['time'], 2 * np.log( (history2['grad_norm'] / history2['grad_norm'][0])), label='Newton') plt.xlabel('Время от начала эксперимента в секундах') plt.ylabel('Логарифм относительного квадрата нормы градиента') plt.legend() plt.grid() plt.savefig("pics/logreg_grad_norm_vs_time_" + dataset)
def third_experiment(): data_path = 'experiment_3/datasets/' result_path = 'experiment_3/' names = ['w8a', 'gisette_scale', 'real-sim'] def plotting(history_gd, history_nm, param): f_gd = np.array(history_gd[param]) f_nm = np.array(history_nm[param]) time_gd = list(map(lambda i: i.total_seconds(), history_gd['time'])) time_nm = list(map(lambda i: i.total_seconds(), history_nm['time'])) if param == 'grad_norm': f_gd = np.log(f_gd / f_gd[0]) f_nm = np.log(f_nm / f_nm[0]) plt.figure() plt.plot(time_gd, f_gd, label='GD') plt.plot(time_nm, f_nm, label='Newton') plt.xlabel('seconds') ylabel = 'func' if param == 'func' else r'$\log\left(grad\_norm\right)$' plt.ylabel(ylabel) plt.legend() plt.grid() plt.savefig(result_path + name + '-' + param) for name in names: A, b = load_svmlight_file(data_path + name) m, n = A.shape oracle = oracles.create_log_reg_oracle(A, b, 1 / m) if name != 'real-sim': print('begin') x_star_nm, _, history_nm = optimization.newton(oracle, np.zeros(n), trace=True) print('Newton is finished') x_star_gd, _, history_gd = optimization.gradient_descent(oracle, np.zeros(n), trace=True) print('GD is finished') plotting(history_gd, history_nm, 'func') plotting(history_gd, history_nm, 'grad_norm')
def experiment_5_and_6(algo='gd'): np.random.seed(31415) m, n = 2000, 1000 A = np.random.randn(m, n) b = np.sign(np.random.randn(m)) regcoef = 1 / m logreg_oracle = create_log_reg_oracle(A, b, regcoef, oracle_type='optimized') line_search_options = [ { 'method': 'Constant', 'c': 1.0 }, { 'method': 'Constant', 'c': 0.95 }, { 'method': 'Constant', 'c': 0.9 }, { 'method': 'Constant', 'c': 0.85 }, { 'method': 'Armijo', 'c1': 1e-8 }, { 'method': 'Armijo', 'c1': 1e-6 }, { 'method': 'Armijo', 'c1': 1e-4 }, { 'method': 'Armijo', 'c1': 1e-1 }, { 'method': 'Wolfe', 'c2': 1.5 }, { 'method': 'Wolfe', 'c2': 0.9 }, { 'method': 'Wolfe', 'c2': 0.1 }, { 'method': 'Wolfe', 'c2': 0.01 }, ] colors = ['#e66101', '#fdb863', '#b2abd2', '#5e3c99'] styles = { 'Constant': { 'linestyle': '--', 'dashes': (2, 5), 'linewidth': 2 }, 'Armijo': { 'linestyle': '--', 'dashes': (5, 2) }, 'Wolfe': { 'linestyle': 'solid' }, } x_0_list = [None] * 3 x_0_list[0] = np.zeros((n, )) x_0_list[1] = np.random.uniform(-1, 1, (n, )) x_0_list[2] = np.ones((n, )) for k, x_0 in enumerate(x_0_list): plt.figure(figsize=(12, 9)) for i, options in tqdm(enumerate(line_search_options)): if algo == 'GD': x_opt, message, history = gradient_descent( logreg_oracle, x_0, trace=True, line_search_options=options) else: x_opt, message, history = newton(logreg_oracle, x_0, trace=True, line_search_options=options) args = list(options.keys()) + list(options.values()) label = "{} ({}={}, {}={})".format(algo, args[0], args[2], args[1], args[3]) values = 2 * np.log( (history['grad_norm'] / history['grad_norm'][0])) method = args[2] plt.plot(values + np.random.randn(values.size) * 0.05, color=colors[i % len(colors)], label=label, alpha=0.7, **styles[method]) plt.xlabel('Номер итерации') plt.ylabel('Логарифм относительного квадрата нормы градиента') plt.legend(loc='upper right') plt.grid() Path("pics/logreg_{}_linear_search_strategies".format(algo)).mkdir( parents=True, exist_ok=True) plt.savefig( "pics/logreg_{}_linear_search_strategies/x_0_{}.png".format( algo, k)) np.random.seed(31415) n = 2000 C = ortho_group.rvs(n) A = C.T @ np.diag(np.random.uniform(1, 20, (n, ))) @ C b = np.random.randn(n) x_0 = np.zeros((n, )) quadratic_oracle = QuadraticOracle(A, b) x_opt = np.linalg.solve(A, b) f_opt = quadratic_oracle.func(x_opt) line_search_options = [ { 'method': 'Constant', 'c': 0.09 }, { 'method': 'Constant', 'c': 0.085 }, { 'method': 'Constant', 'c': 0.08 }, { 'method': 'Constant', 'c': 0.075 }, { 'method': 'Armijo', 'c1': 1e-10 }, { 'method': 'Armijo', 'c1': 1e-7 }, { 'method': 'Armijo', 'c1': 1e-4 }, { 'method': 'Armijo', 'c1': 1e-1 }, { 'method': 'Wolfe', 'c2': 1.5 }, { 'method': 'Wolfe', 'c2': 0.9 }, { 'method': 'Wolfe', 'c2': 0.1 }, { 'method': 'Wolfe', 'c2': 0.01 }, ] x_0_list = [None] * 3 x_0_list[0] = np.zeros((n, )) x_0_list[1] = np.random.uniform(-1, 1, (n, )) x_0_list[2] = x_opt + np.random.randn(n, ) * 0.2 for k, x_0 in enumerate(x_0_list): plt.figure(figsize=(12, 9)) for i, options in tqdm(enumerate(line_search_options)): if algo == 'GD': x_opt, message, history = gradient_descent( quadratic_oracle, x_0, trace=True, line_search_options=options) else: x_opt, message, history = newton(quadratic_oracle, x_0, trace=True, line_search_options=options) args = list(options.keys()) + list(options.values()) label = "{} ({}={}, {}={})".format(algo, args[0], args[2], args[1], args[3]) values = np.log(np.abs((history['func'] - f_opt) / f_opt) + 1e-16) method = args[2] plt.plot(values + np.random.randn(values.size) * 0.05, color=colors[i % len(colors)], label=label, alpha=0.7, **styles[method]) plt.xlabel('Номер итерации') plt.ylabel('Логарифм относительной невязки') plt.legend(loc='upper right') plt.grid() Path("pics/quadratic_{}_linear_search_strategies".format(algo)).mkdir( parents=True, exist_ok=True) plt.savefig( "pics/quadratic_{}_linear_search_strategies/x_0_{}.png".format( algo, k))