def test_gradient_descent_stops(): # Test stopping conditions of gradient descent. class ObjectiveSmallGradient: def __init__(self): self.it = -1 def __call__(self, _, compute_error=True): self.it += 1 return (10 - self.it) / 10.0, np.array([1e-5]) def flat_function(_, compute_error=True): return 0.0, np.ones(1) # Gradient norm old_stdout = sys.stdout sys.stdout = StringIO() try: _, error, it = _gradient_descent( ObjectiveSmallGradient(), np.zeros(1), 0, n_iter=100, n_iter_without_progress=100, momentum=0.0, learning_rate=0.0, min_gain=0.0, min_grad_norm=1e-5, verbose=2) finally: out = sys.stdout.getvalue() sys.stdout.close() sys.stdout = old_stdout assert error == 1.0 assert it == 0 assert("gradient norm" in out) # Maximum number of iterations without improvement old_stdout = sys.stdout sys.stdout = StringIO() try: _, error, it = _gradient_descent( flat_function, np.zeros(1), 0, n_iter=100, n_iter_without_progress=10, momentum=0.0, learning_rate=0.0, min_gain=0.0, min_grad_norm=0.0, verbose=2) finally: out = sys.stdout.getvalue() sys.stdout.close() sys.stdout = old_stdout assert error == 0.0 assert it == 11 assert("did not make any progress" in out) # Maximum number of iterations old_stdout = sys.stdout sys.stdout = StringIO() try: _, error, it = _gradient_descent( ObjectiveSmallGradient(), np.zeros(1), 0, n_iter=11, n_iter_without_progress=100, momentum=0.0, learning_rate=0.0, min_gain=0.0, min_grad_norm=0.0, verbose=2) finally: out = sys.stdout.getvalue() sys.stdout.close() sys.stdout = old_stdout assert error == 0.0 assert it == 10 assert("Iteration 10" in out)
def jumped_gradient_descent(self, obj_func, params, **opt_args): it = opt_args['it'] - 1 n_jump_without_progress = opt_args[ "n_iter_without_progress"] // self.jump_size + 1 remaining = opt_args['n_iter'] - it n_jumps = remaining // self.jump_size + 1 ct_no_improvement = 0 kl_divergence_best = None new_opt_args = opt_args.copy() for jump in range(n_jumps): new_opt_args['it'] = it + 1 new_opt_args['n_iter'] = it + self.jump_size params, kl_divergence, it = _gradient_descent( obj_func, params, **new_opt_args) self.X_embedded_jumps.append(params.reshape(-1, self.n_components)) self.momentum_jumps.append(opt_args['momentum']) if jump > 0: print( "\rJump {}/{}: best_kl={:.6f}\t current_kl={:.6f}".format( jump + 1, n_jumps, kl_divergence_best, kl_divergence), end="", flush=True) if kl_divergence_best is None or kl_divergence < kl_divergence_best: kl_divergence_best = kl_divergence ct_no_improvement = 0 else: ct_no_improvement += 1 if ct_no_improvement >= n_jump_without_progress: break print() return params, kl_divergence, it