def minimize_stochastic(target_fn, gradient_fn, x, y, theta_0, alpha_0 = 0.01): data = zip(x, y) theta = theta_0 # initial guess alpha = alpha_0 # initial step size min_theta, min_value = None, float("inf") # the minimum so far iterations_with_no_improvement = 0 # if we ever go 100 iterations with no improvement, stop while iterations_with_no_improvement < 100: value = sum( target_fn(x_i, y_i, theta) for x_i, y_i in data ) if value < min_value: # if we've found a new minimum, remember it # and go back to the original step size min_theta, min_value = theta, value iterations_with_no_improvement = 0 alpha = alpha_0 else: # otherwise we're not improving, so try shrinking the step size iterations_with_no_improvement += 1 alpha *= 0.9 # and take a gradient step for each of the data points for x_i, y_i in in_random_order(data): gradient_i = gradient_fn(x_i, y_i, theta) theta = la.vector_subtract(theta, la.scalar_multiply(alpha, gradient_i)) return min_theta
def covariance(x, y): n = len(x) return alg.dot(de_mean(x), de_mean(y)) / (n - 1)
""" move step_size in the direction from v """ return [v_i + step_size * direction_i for v_i, direction_i in zip(v, direction)] def sum_of_squares_gradient(v): return [2 * v_i for v_i in v] # pick a random starting point v = [random.randint(-10, 10) for i in range(3)] tolerance = 0.0000001 while True: gradient = sum_of_squares_gradient(v) # compute the gradient at v next_v = step(v, gradient, -0.01) # take a negative gradient step if la.distance(next_v, v) < tolerance: break v = next_v # continue if we're not print(v) def safe(f): """ return a new function that's the same as f, except that it outputs infinity whenever f produces an error """ def safe_f(*args, **kwargs): try: return f(*args, **kwargs) except: return float('inf') # this means infinity in Python return safe_f
def variance(x): """ assumes x has at least two elements """ n = len(x) deviations = de_mean(x) return alg.sum_of_squares(deviations) / (n - 1)