def minimize_stochastic(target_fn, gradient_fn, x, y, theta_0, alpha_0=0.01): data = zip(x, y) theta = theta_0 # initial guess alpha = alpha_0 # initial step size min_theta, min_value = None, float("inf") # the minimum so far iterations_with_no_improvement = 0 # if we ever go 100 iterations with no improvement, stop while iterations_with_no_improvement < 100: value = sum(target_fn(x_i, y_i, theta) for x_i, y_i in data) if value < min_value: # if we've found a new minimum, remember it # and go back to the original step size min_theta, min_value = theta, value iterations_with_no_improvement = 0 alpha = alpha_0 else: # otherwise we're not improving, so try shrinking the step size iterations_with_no_improvement += 1 alpha *= 0.9 # and take a gradient step for each of the data points for x_i, y_i in in_random_order(data): gradient_i = gradient_fn(x_i, y_i, theta) theta = vector_substract(theta, scalar_multiply(alpha, gradient_i)) return min_theta
def gradient_step(v: Vector, gradient: Vector, step_size: float) -> Vector: """ Moves `step_size` along the gradient of f w.r.t. xs, returning a input """ assert len(v) == len(gradient) update = scalar_multiply(step_size, gradient) return add(v, update)
def minimize_stochastic(target_fn, gradient_fn, x, y, theta_0, alpha_0=0.01): data = zip(x, y) theta = theta_0 # initial guess alpha = alpha_0 # initial step size min_theta, min_value = None, float("inf") # the minimum so far iterations_with_no_improvement = 0 # if we ever go 100 iterations with no improvement, stop while iterations_with_no_improvement < 100: value = sum( target_fn(x_i, y_i, theta) for x_i, y_i in data ) if value < min_value: # if we've found a new minimum, remember it # and go back to the original step size min_theta, min_value = theta, value iterations_with_no_improvement = 0 alpha = alpha_0 else: # otherwise we're not improving, so try shrinking the step size iterations_with_no_improvement += 1 alpha *= 0.9 # and take a gradient step for each of the data points for x_i, y_i in in_random_order(data): gradient_i = gradient_fn(x_i, y_i, theta) theta = vector_substract(theta, scalar_multiply(alpha, gradient_i)) return min_theta
def test_minimize_batch(self): optimized = gradient_descent.minimize_batch( lambda v: v[0] ** 2 + v[1] ** 2 + v[2] ** 2, lambda v: scalar_multiply(2, v), # 2*[x y z] = [2x 2y 2z] [3, 2, 1], tolerance=0.000001 ) for index, value in enumerate(optimized): self.assertAlmostEquals(0, value, places=2, msg='Value {0} not optimized to 0 for dimension of index: {1}'.format(value, index))
def test_maximize_batch(self): # cette parabole est maximisée pour [-1 -1] optimized = gradient_descent.maximize_batch( lambda v: - ((v[0]+1) ** 2 + (v[1]+1) ** 2), # f(x,y)= - ((x+1)**2 + (y+1)**2) lambda v: scalar_multiply(-2, vector_add(v, [1, 1])), # f'(x,y) = [-2(x+1) -2(y+1)] [3, 2], tolerance=0.000001 ) for index, value in enumerate(optimized): self.assertAlmostEquals(-1, value, places=2, msg='Value {0} not optimized to -1 for dimension of index: {1}'.format(value, index))
def find_eigenvector(A, tolerance=0.00001): guess = [random.random() for _ in A] while True: result = matrix_operate(A, guess) length = magnitude(result) next_guess = scalar_multiply(1/length, result) # rescale to a magnitude of 1 if distance(guess, next_guess) < tolerance: return next_guess, length # eigenvector, eigenvalue guess = next_guess
def test_minimize_batch(self): optimized = gradient_descent.minimize_batch( lambda v: v[0]**2 + v[1]**2 + v[2]**2, lambda v: scalar_multiply(2, v), # 2*[x y z] = [2x 2y 2z] [3, 2, 1], tolerance=0.000001) for index, value in enumerate(optimized): self.assertAlmostEquals( 0, value, places=2, msg='Value {0} not optimized to 0 for dimension of index: {1}'. format(value, index))
def test_maximize_batch(self): # cette parabole est maximisée pour [-1 -1] optimized = gradient_descent.maximize_batch( lambda v: -((v[0] + 1)**2 + (v[1] + 1)**2), # f(x,y)= - ((x+1)**2 + (y+1)**2) lambda v: scalar_multiply(-2, vector_add(v, [1, 1]) ), # f'(x,y) = [-2(x+1) -2(y+1)] [3, 2], tolerance=0.000001) for index, value in enumerate(optimized): self.assertAlmostEquals( -1, value, places=2, msg='Value {0} not optimized to -1 for dimension of index: {1}' .format(value, index))
def project(vector, direction_vector): projection_length = dot(vector, direction_vector) return scalar_multiply(projection_length, direction_vector)
def project2(v: Vector, w: Vector) -> Vector: """return the projection of v onto the direction w""" projection_length = dot(v, w) return scalar_multiply(projection_length, w)
def test_scalar_multiply(self): self.assertEqual([-3, 9, 0], linalg.scalar_multiply(3, [-1, 3, 0])) self.assertEqual([], linalg.scalar_multiply(3, []))
def _grad_step(beta: Vector, grad: Vector, lr: float = 10**-3) -> Vector: # repeating this to check my knowledge of the gradient update update: Vector = scalar_multiply(-1 * lr, grad) return vector_sum([beta, update])