def test_mul(): v1 = ad.create_vector('v', [1, 2]) v2 = ad.create_vector('w', [3, 5]) v3 = v1 * v2 assert(v3[0].getValue() == 3) assert(v3[1].getValue() == 10) jacobian = ad.get_jacobian(v3, ['v1', 'v2', 'w1', 'w2']) assert(np.array_equal(jacobian, np.array([[3, 0, 1, 0], [0, 5, 0, 2]]))) x = ad.Scalar('x', 1) y = ad.Scalar('y', 2) v = ad.Scalar('v', 3) v1 = np.array([x, y]) v2 = np.array([v, 3 * v]) v3 = v1 * v2 assert(v3[0].getValue() == 3) assert(v3[1].getValue() == 18) jacobian = ad.get_jacobian(v3, ['x', 'y', 'v']) assert(np.array_equal(jacobian, np.array([[3, 0, 1], [0, 9, 6]]))) v1 = ad.create_vector('v', [2, 3]) v3 = v1 * v1 assert(v3[0].getValue() == 4) assert(v3[1].getValue() == 9) jacobian = ad.get_jacobian(v3, ['v1', 'v2']) assert(np.array_equal(jacobian, np.array([[4, 0], [0, 6]]))) v1 = ad.create_vector('v', [1, 2]) v2 = v1 * 10 assert(v2[0].getValue() == 10) assert(v2[1].getValue() == 20) jacobian = ad.get_jacobian(v2, ['v1', 'v2']) assert(np.array_equal(jacobian, np.array([[10, 0], [0, 10]]))) x = ad.Scalar('x', 5) y = ad.Scalar('y', 2) v1 = np.array([x, y]) v2 = np.array([x * y, (x + y)]) v3 = v1 * v2 assert(v3[0].getValue() == 50) assert(v3[1].getValue() == 14) jacobian = ad.get_jacobian(v3, ['x', 'y']) assert(np.array_equal(jacobian, np.array([[20, 25], [2, 9]]))) x = ad.Scalar('x', 1) y = ad.Scalar('y', 2) v1 = np.array([x, y]) v2 = np.array([y, 10]) v3 = v1 * v2 assert(v3[0].getValue() == 2) assert(v3[1].getValue() == 20) jacobian = ad.get_jacobian(v3, ['x', 'y']) assert(np.array_equal(jacobian, np.array([[2, 1], [0, 10]])))
def test_add(): v1 = ad.create_vector('v', [1, 2]) v2 = ad.create_vector('v', [1, 5]) v3 = v1 + v2 assert(v3[0].getValue() == 2) assert(v3[1].getValue() == 7) jacobian = ad.get_jacobian(v3, ['v1', 'v2']) assert(np.array_equal(jacobian, np.array([[2, 0], [0, 2]]))) v1 = ad.create_vector('v', [1, 2]) v2 = v1 + 10 assert(v2[0].getValue() == 11) assert(v2[1].getValue() == 12) jacobian = ad.get_jacobian(v2, ['v1', 'v2']) assert(np.array_equal(jacobian, np.array([[1, 0], [0, 1]]))) v1 = ad.create_vector('v', [1, 2]) v2 = ad.Scalar('v2', 4) v3 = ad.Scalar('v1', 7) v4 = v1 + np.array([v2, v3]) assert(v4[0].getValue() == 5) assert(v4[1].getValue() == 9) jacobian = ad.get_jacobian(v4, ['v1', 'v2']) assert(np.array_equal(jacobian, np.array([[1, 1], [1, 1]]))) x = ad.Scalar('x', 1) y = ad.Scalar('y', 2) v1 = np.array([x, y]) v2 = ad.create_vector('v', [1, 5]) v3 = v1 + v2 assert(v3[0].getValue() == 2) assert(v3[1].getValue() == 7) jacobian = ad.get_jacobian(v3, ['x', 'y']) assert(np.array_equal(jacobian, np.array([[1, 0], [0, 1]]))) x = ad.Scalar('x', 1) y = ad.Scalar('y', 2) v1 = np.array([x, y]) v2 = np.array([x + y, x]) v3 = v1 + v2 assert(v3[0].getValue() == 4) assert(v3[1].getValue() == 3) jacobian = ad.get_jacobian(v3, ['x', 'y']) assert(np.array_equal(jacobian, np.array([[2, 1], [1, 1]]))) x = ad.Scalar('x', 1) y = ad.Scalar('y', 2) v1 = np.array([x, y]) v2 = np.array([y, 10]) v3 = v1 + v2 assert(v3[0].getValue() == 3) assert(v3[1].getValue() == 12) jacobian = ad.get_jacobian(v3, ['x', 'y']) assert(np.array_equal(jacobian, np.array([[1, 1], [0, 1]])))
def test_create_vector(): v = ad.create_vector('v', [1, 2]) assert(v[0].getValue() == 1) assert(v[1].getValue() == 2) derivs = ad.get_deriv(v) assert(np.array_equal(np.array([deriv.get('v1', 0) for deriv in derivs]), np.array([1, 0]))) assert(np.array_equal(np.array([deriv.get('v2', 0) for deriv in derivs]), np.array([0, 1]))) jacobian = ad.get_jacobian(v, ['v1', 'v2']) assert(np.array_equal(jacobian, np.array([[1, 0], [0, 1]]))) jacobian = ad.get_jacobian(v, ['v1', 'v2', 'hello']) assert(np.array_equal(jacobian, np.array([[1, 0, 0], [0, 1, 0]]))) v = ad.create_vector('v', [1, 2], [3, 4]) assert(v[0].getValue() == 1) assert(v[1].getValue() == 2) derivs = ad.get_deriv(v) assert(np.array_equal(np.array([deriv.get('v1', 0) for deriv in derivs]), np.array([3, 0]))) assert(np.array_equal(np.array([deriv.get('v2', 0) for deriv in derivs]), np.array([0, 4]))) jacobian = ad.get_jacobian(v, ['v1', 'v2']) assert(np.array_equal(jacobian, np.array([[3, 0], [0, 4]]))) jacobian = ad.get_jacobian(v, ['v1', 'v2', 'hello']) assert(np.array_equal(jacobian, np.array([[3, 0, 0], [0, 4, 0]]))) with pytest.raises(Exception): v = ad.create_vector('v', [1, 2], [3, 4, 5]) x = ad.Scalar('x', 1) y = ad.Scalar('y', 2) v = np.array([x, y]) assert(np.array_equal(ad.get_value(v), np.array([1, 2]))) jacobian = ad.get_jacobian(v, ['v1', 'v2']) assert(np.array_equal(jacobian, np.array([[0, 0], [0, 0]]))) jacobian = ad.get_jacobian(v, ['x', 'y']) assert(np.array_equal(jacobian, np.array([[1, 0], [0, 1]]))) x = ad.Scalar('x', 1) y = ad.Scalar('y', 2) v = np.array([x, 2 * y]) assert(np.array_equal(ad.get_value(v), np.array([1, 4]))) jacobian = ad.get_jacobian(v, ['x', 'y']) assert(np.array_equal(jacobian, np.array([[1, 0], [0, 2]]))) jacobian = ad.get_jacobian(v, ['y', 'x']) assert(np.array_equal(jacobian, np.array([[0, 1], [2, 0]]))) x = ad.Scalar('x', 1) y = ad.Scalar('y', 2) v = np.array([x + y, 2 * y]) assert(np.array_equal(ad.get_value(v), np.array([3, 4]))) jacobian = ad.get_jacobian(v, ['x', 'y']) assert(np.array_equal(jacobian, np.array([[1, 1], [0, 2]]))) jacobian = ad.get_jacobian(v, ['y', 'x']) assert(np.array_equal(jacobian, np.array([[1, 1], [2, 0]])))
def test_tan(): v1 = ad.create_vector('v', [0, 100]) v2 = ad.tan(v1) assert(v2[0].getValue() == 0) assert(np.isclose(v2[1].getValue(), np.tan(100))) jacobian = ad.get_jacobian(v2, ['v1', 'v2']) assert(np.isclose(jacobian, np.array([[1, 0], [0, 1 / (np.cos(100) ** 2)]])).all())
def test_sin(): v1 = ad.create_vector('v', [0, 100]) v2 = ad.sin(v1) assert(v2[0].getValue() == 0) assert(np.isclose(v2[1].getValue(), np.sin(100))) jacobian = ad.get_jacobian(v2, ['v1', 'v2']) assert(np.array_equal(jacobian, np.array([[1, 0], [0, np.cos(100)]]))) v1 = ad.Scalar('x', 4) v2 = ad.Scalar('y', 10) v3 = ad.sin(np.array([v1, v2])) / ad.sin(np.array([v1, v2])) assert(np.isclose(v3[0].getValue(), 1)) assert(np.isclose(v3[1].getValue(), 1)) jacobian = ad.get_jacobian(v3, ['x', 'y']) assert(np.isclose(jacobian, np.array([[0, 0], [0, 0]])).all()) v1 = ad.Scalar('x', 4) v2 = ad.Scalar('y', 10) v3 = ad.sin(np.array([v1, v2])) ** 2 assert(np.isclose(v3[0].getValue(), np.sin(4) ** 2)) assert(np.isclose(v3[1].getValue(), np.sin(10) ** 2)) jacobian = ad.get_jacobian(v3, ['x', 'y']) assert(np.isclose(jacobian, np.array([[2 * np.sin(4) * np.cos(4), 0], [0, 2 * np.sin(10) * np.cos(10)]])).all()) v1 = ad.Scalar('x', 4) v2 = ad.Scalar('y', 10) v3 = ad.sin(np.array([v1 * v2, v1 + v2])) ** 2 assert(np.isclose(v3[0].getValue(), np.sin(40) ** 2)) assert(np.isclose(v3[1].getValue(), np.sin(14) ** 2)) jacobian = ad.get_jacobian(v3, ['x', 'y']) assert(np.isclose(jacobian, np.array([[2 * np.sin(40) * np.cos(40) * 10, 2 * np.sin(40) * np.cos(40) * 4], [2 * np.sin(14) * np.cos(14), 2 * np.sin(14) * np.cos(14)]])).all())
def test_pow(): v1 = ad.create_vector('v', [2, 5]) v2 = v1 ** 2 assert(v2[0].getValue() == 4) assert(v2[1].getValue() == 25) jacobian = ad.get_jacobian(v2, ['v1', 'v2']) assert(np.array_equal(jacobian, np.array([[4, 0], [0, 10]]))) x = ad.Scalar('x', 2) y = ad.Scalar('y', 5) v1 = np.array([x, y]) v2 = v1 ** 2 assert(v2[0].getValue() == 4) assert(v2[1].getValue() == 25) jacobian = ad.get_jacobian(v2, ['x', 'y']) assert(np.array_equal(jacobian, np.array([[4, 0], [0, 10]]))) x = ad.Scalar('x', 2) y = ad.Scalar('y', 3) v1 = np.array([x, y]) v2 = (v1 ** 2) ** 3 assert(v2[0].getValue() == 64) assert(v2[1].getValue() == 729) jacobian = ad.get_jacobian(v2, ['x', 'y']) assert(np.array_equal(jacobian, np.array([[6 * (2 ** 5), 0], [0, 6 * (3 ** 5)]]))) x = ad.Scalar('x', 2) y = ad.Scalar('y', 3) v1 = np.array([x, y]) v2 = np.array([y, 2]) v3 = v1 ** v2 assert(v3[0].getValue() == 8) assert(v3[1].getValue() == 9) jacobian = ad.get_jacobian(v3, ['x', 'y']) assert(np.array_equal(jacobian, np.array([[12, np.log(2) * 8], [0, 6]])))
def test_cos(): #Similar to sin. v1 = ad.create_vector('v', [0, 100]) v2 = ad.cos(v1) assert(v2[0].getValue() == 1) assert(np.isclose(v2[1].getValue(), np.cos(100))) jacobian = ad.get_jacobian(v2, ['v1', 'v2']) assert(np.isclose(jacobian, np.array([[0, 0], [0, -np.sin(100)]])).all())
def L_fun(x): """ Action Returns J_f(x0)*x by setting the values of 'x' as the initial derivatives for the variables in x0. """ f_x0 = f(ad.create_vector('x0', x0, seed_vector=x)) f_x0 = np.array(f_x0) #ensure that f_x0 is np.array action = sum_values(ad.get_deriv(f_x0)) return action
def test_neg(): v1 = ad.create_vector('v', [1, 2]) v2 = -v1 assert(v2[0].getValue() == -1) assert(v2[1].getValue() == -2) jacobian = ad.get_jacobian(v2, ['v1', 'v2']) assert(np.array_equal(jacobian, np.array([[-1, 0], [0, -1]]))) v3 = -v2 assert(v3[0].getValue() == 1) assert(v3[1].getValue() == 2) jacobian = ad.get_jacobian(v3, ['v1', 'v2']) assert(np.array_equal(jacobian, np.array([[1, 0], [0, 1]]))) v1 = ad.create_vector('v', [1, 2]) v2 = -1 * -v1 assert(v2[0].getValue() == 1) assert(v2[1].getValue() == 2) jacobian = ad.get_jacobian(v2, ['v1', 'v2']) assert(np.array_equal(jacobian, np.array([[1, 0], [0, 1]])))
def test_rpow(): v1 = ad.create_vector('v', [2, 5]) v2 = 2 ** v1 assert(v2[0].getValue() == 4) assert(v2[1].getValue() == 32) jacobian = ad.get_jacobian(v2, ['v1', 'v2']) assert(np.array_equal(jacobian, np.array([[np.log(2) * 4, 0], [0, np.log(2) * 32]]))) x = ad.Scalar('x', 2) y = ad.Scalar('y', 5) v1 = np.array([x, y]) v2 = 2 ** v1 assert(v2[0].getValue() == 4) assert(v2[1].getValue() == 32) jacobian = ad.get_jacobian(v2, ['x', 'y']) assert(np.array_equal(jacobian, np.array([[np.log(2) * 4, 0], [0, np.log(2) * 32]]))) x = ad.Scalar('x', 2) y = ad.Scalar('y', 3) v1 = np.array([x, y]) v2 = 2 ** (2 * v1) assert(v2[0].getValue() == 16) assert(v2[1].getValue() == 64) jacobian = ad.get_jacobian(v2, ['x', 'y']) assert(np.array_equal(jacobian, np.array([[np.log(2) * 32, 0], [0, np.log(2) * 128]]))) x = ad.Scalar('x', 2) y = ad.Scalar('y', 3) v1 = np.array([x, y]) v2 = (2 ** 2) ** v1 assert(v2[0].getValue() == 16) assert(v2[1].getValue() == 64) jacobian = ad.get_jacobian(v2, ['x', 'y']) assert(np.array_equal(jacobian, np.array([[np.log(2) * (2 ** 4) * 2, 0], [0, np.log(2) * (2 ** 6) * 2]]))) x = ad.Scalar('x', 2) y = ad.Scalar('y', 3) v1 = np.array([x + y, x]) v2 = (2 ** 2) ** v1 assert(v2[0].getValue() == 2 ** 10) assert(v2[1].getValue() == 16) jacobian = ad.get_jacobian(v2, ['x', 'y']) assert(np.array_equal(jacobian, np.array([[np.log(2) * (2 ** 10) * 2, np.log(2) * (2 ** 10) * 2], [np.log(2) * (2 ** 4) * 2, 0]]))) x = ad.Scalar('x', 2) y = ad.Scalar('y', 3) v1 = np.array([x + y, x]) v2 = 2 ** (2 * v1) assert(v2[0].getValue() == 2 ** 10) assert(v2[1].getValue() == 16) jacobian = ad.get_jacobian(v2, ['x', 'y']) assert(np.array_equal(jacobian, np.array([[np.log(2) * (2 ** 10) * 2, np.log(2) * (2 ** 10) * 2], [np.log(2) * (2 ** 4) * 2, 0]])))
def test_exp(): v1 = ad.create_vector('v', [2, 5]) v2 = ad.exp(v1) assert(np.isclose(v2[0].getValue(), np.exp(2))) assert(np.isclose(v2[1].getValue(), np.exp(5))) jacobian = ad.get_jacobian(v2, ['v1', 'v2']) assert(np.array_equal(jacobian, np.array([[np.exp(2), 0], [0, np.exp(5)]]))) v1 = ad.create_vector('v', [2, 5]) v2 = ad.exp(2 * v1) assert(np.isclose(v2[0].getValue(), np.exp(4))) assert(np.isclose(v2[1].getValue(), np.exp(10))) jacobian = ad.get_jacobian(v2, ['v1', 'v2']) assert(np.array_equal(jacobian, 2 * np.array([[np.exp(4), 0], [0, np.exp(10)]]))) x = ad.Scalar('x', 2) y = ad.Scalar('y', 3) v1 = ad.exp(np.array([x + y, x * y])) assert(np.isclose(v1[0].getValue(), np.exp(5))) assert(np.isclose(v1[1].getValue(), np.exp(6))) jacobian = ad.get_jacobian(v1, ['x', 'y']) assert(np.array_equal(jacobian, np.array([[np.exp(5), np.exp(5)], [3 * np.exp(6), 2 * np.exp(6)]])))
def test_sub(): x = ad.Scalar('x', 1) y = ad.Scalar('y', 2) v1 = np.array([x, y]) v2 = np.array([y, x]) v3 = v1 - v2 assert(v3[0].getValue() == -1) assert(v3[1].getValue() == 1) jacobian = ad.get_jacobian(v3, ['x', 'y']) assert(np.array_equal(jacobian, np.array([[1, -1], [-1, 1]]))) v1 = ad.create_vector('v', [1, 2]) v2 = v1 - 10 assert(v2[0].getValue() == -9) assert(v2[1].getValue() == -8) jacobian = ad.get_jacobian(v2, ['v1', 'v2']) assert(np.array_equal(jacobian, np.array([[1, 0], [0, 1]]))) x = ad.Scalar('x', 1) y = ad.Scalar('y', 2) v1 = np.array([x, y]) v2 = ad.create_vector('v', [1, 5]) v3 = v1 - v2 assert(v3[0].getValue() == 0) assert(v3[1].getValue() == -3) jacobian = ad.get_jacobian(v3, ['x', 'y', 'v1', 'v2']) assert(np.array_equal(jacobian, np.array([[1, 0, -1, 0], [0, 1, 0, -1]]))) x = ad.Scalar('x', 1) y = ad.Scalar('y', 2) v1 = np.array([x, y]) v2 = np.array([y, 10]) v3 = v1 - v2 assert(v3[0].getValue() == -1) assert(v3[1].getValue() == -8) jacobian = ad.get_jacobian(v3, ['x', 'y']) assert(np.array_equal(jacobian, np.array([[1, -1], [0, 1]])))
def gradient_descent(f, intial_guess, step_size=0.01, max_iter=10000, tol=1e-12): """ Implements gradient descent INPUTS ======= f: function The function that we are trying to find the minimum of. The function must take in single list/array that has the same dimension as len(initial_guess). initial_guess: List or array of ints/floats The initial position to begin the search for the minimum of the function 'f'. step_size: float The step size. In this case the step size will be constant max_iter: int The max number of iterations tol: float The tolerance. If the norm of the gradient is less than the tolerance, the algorithm will stop RETURNS ======== Tuple A tuple with first entry which maps to the position of the minimum and second entry which maps to the number of iterations it took for the algorithm to stop """ x = np.array(intial_guess) for i in range(max_iter): x_vector = ad.create_vector('x', x) fn_at_x = f(x_vector) gradient = fn_at_x.getGradient( ['x{}'.format(i) for i in range(1, len(x) + 1)]) if np.sqrt(np.abs(gradient).sum()) < tol: break x = x - step_size * gradient return (x, i + 1)
def line_search(f, x, p, tau=0.1, c=0.1, alpha=1): """ Implements Backtracking Line Search. https://en.wikipedia.org/wiki/Backtracking_line_search INPUTS ======= fn: Function The function that we are trying to find the minimum of. The function must take in the same number of arguments as len(x) x: List or array of ints/floats The initial position p: numpy array Descent direction tau: float Search control parameter tau c: float Search control parameter alpha: float Starting alpha RETURNS ======== float The alpha we found through backtracking line search """ x = ad.create_vector('x', x) fn_val1 = f(x) fn_val2 = f(x + alpha * p) gradient = fn_val1.getGradient( ['x{}'.format(i) for i in range(1, len(x) + 1)]) m = (p * gradient).sum() t = -c * m while ad.get_value(fn_val1 - fn_val2) < alpha * t: alpha = tau * alpha fn_val2 = f(x + alpha * p) return alpha
def quasi_newtons_method(f, initial_guess, max_iter=10000, method='BFGS', tol=1e-12): """ Implements Quasi-Newton methods with different methods to estimate the inverse of the Hessian. Utilizes backtracking line search to determine step size. https://en.wikipedia.org/wiki/Quasi-Newton_method INPUTS ======= f: function The function that we are trying to find the minimum of. The function must take in single list/array that has the same dimension as len(initial_guess). initial_guess: List or array of ints/floats The initial position to begin the search for the minimum of the function 'f'. max_iter: int The max number of iterations method: String The update method to update the estimate of the inverse of the Hessian. Currently, BFGS, DFP, and Broyden are implemented. tol: float The tolerance. If the norm of the gradient is less than the tolerance, the algorithm will stop RETURNS ======== Tuple A tuple with first entry which maps to the position of the minimum and second entry which maps to the number of iterations it took for the algorithm to stop """ if method not in ['BFGS', 'DFP', 'Broyden']: raise Exception("Not a valid method.") x = initial_guess H = np.identity(len(x)) for i in range(max_iter): x_vector = ad.create_vector('x', x) fn_at_x = f(x_vector) gradient = fn_at_x.getGradient( ['x{}'.format(i) for i in range(1, len(x) + 1)]) p = -H @ gradient alpha = line_search(f, x, p) delta_x = alpha * p x = x + delta_x x_vector2 = ad.create_vector('x', x) fn_at_x2 = f(x_vector2) gradient2 = fn_at_x2.getGradient( ['x{}'.format(i) for i in range(1, len(x) + 1)]) if np.sqrt(np.abs(gradient2).sum()) < tol: break y = (gradient2 - gradient).reshape(-1, 1) delta_x = delta_x.reshape(-1, 1) if method == 'BFGS': H = (np.identity(len(H)) - (delta_x @ y.T) / (y.T @ delta_x)) @ H \ @ (np.identity(len(H)) - (y @ delta_x.T) / (y.T @ delta_x)) + (delta_x @ delta_x.T) / (y.T @ delta_x) elif method == 'DFP': H = H + (delta_x @ delta_x.T) / (delta_x.T @ y) - ( H @ y @ y.T @ H) / (y.T @ H @ y) elif method == 'Broyden': H = H + ((delta_x - H @ y) @ delta_x.T @ H) / (delta_x.T @ H @ y) return (x, i + 1)
def newtons_method(f, initial_guess, max_iter=1000, method='exact', tol=1e-12): """ Implements Newton's method for root-finding with different methods to find the step at each iteration INPUTS ======= f: Function The function that we are trying to find a root of. The function must take in single list/array that has the same dimension as len(initial_guess). initial_guess: List or array of ints/floats The initial position to begin the search for the roots of the function 'f'. max_iter: int The max number of iterations method: String The method to solve Ax=b to find the step 'x' at each iteration. Options: 'inverse' : calculate (A^-1)*b = x 'exact' : Use np.linalg.solve(A, b) 'gmres" : Use scipy.sparse.linalg.gmres(A, b), which finds a solution iteratively 'gmres_action' : Use np.linalg.gmres(L, b), where 'L' is a linear operator used to efficiently calculate A*x. Works well for functions with sparse Jacobian matrices. tol: float The tolerance. If the abs value of the steps for one iteration are less than the tol, then the algorithm stops RETURNS ======== Tuple A tuple with first entry which maps to the position of the minimum and second entry which maps to the number of iterations it took for the algorithm to stop. NOTES ===== POST: - Returns a tuple. The first entry maps to the position of the minimum, and the second entry is the number of iterations it took for the algorithm to stop. - If the convergence is not reached by 'max_iter', then a RuntimeError is thrown to alert the user. """ if method not in ['inverse', 'exact', 'gmres', 'gmres_action']: raise Exception("Not a valid method.") if len(f(initial_guess)) != len(initial_guess): raise Exception( 'Output dimension of f should be the same as the input dimension of f.' ) if method == 'gmres_action': return _newtons_method_gmres_action(f, initial_guess, max_iter, tol) x0 = ad.create_vector('x0', initial_guess) for iter_num in range(max_iter): fn = np.array(f(x0)) #need convert the list/array that is passed back from function, so downstream autodiff functions for vectors work properly jacob = ad.get_jacobian( fn, ['x0{}'.format(i) for i in range(1, len(fn) + 1)]) if method == 'inverse': step = np.linalg.inv(-jacob).dot(ad.get_value(fn)) if method == 'exact': step = np.linalg.solve(-jacob, ad.get_value(fn)) elif method == 'gmres': step, _ = gmres(jacob, -ad.get_value(fn), tol=tol, atol='legacy') xnext = x0 + step #check if we have converged if np.all(np.abs(ad.get_value(xnext) - ad.get_value(x0)) < tol): return (ad.get_value(xnext), iter_num + 1) #update x0 because we have not converged yet x0 = xnext raise RuntimeError( "Failed to converge after {0} iterations, value is {1}".format( max_iter, ad.get_value(x0)))