Exemplo n.º 1
0
def least_squares_GD(y, tx, initial_w, max_iters, gamma, printing=True):
    """Linear regression using gradient descent for max_iters iteration given
    the input labelled data y, tx with initial_w and gamma as the initial weight and the
    learning rate respectively. Return final weights and loss"""
    
    w = initial_w
    losses = []
    
    # Set a threshold to stop the iterations before max_iters if a great approximation of the optimum is obtained
    thres = 1e-8 
    
    # Gradient descent iterations
    for n_iter in range(max_iters):
        
        # Compute gradient, loss and update w
        _,grad = compute_gradient(y,tx,w)
        loss = compute_loss(y,tx,w)
        w = w - gamma*grad
        
        losses.append(loss)
        
        
        if len(losses) > 1 and np.abs(losses[-1]-losses[-2]) < thres:
            """If the difference between the two last computed losses becomes inferior to thres, 
             we can estimate that the gradient of the loss approaches zero and that we tend to the optimum"""
            break
            
        if printing==True:    
            print("Gradient Descent({bi}/{ti}): loss={l}, weights = {we}".format(
                bi=n_iter, ti=max_iters - 1, l=loss, we = w ))
        
    return w, losses[-1]
Exemplo n.º 2
0
def least_squares_GD(y, tx, initial_w, max_iters, gamma):
    """Gradient descent algorithm."""
    
    w = initial_w
    losses = []
    
    for n_iter in range(max_iters):

        # compute gradient and loss
        gradient = compute_gradient(y, tx, w)[0]

        # update w by gradient
        w = w - gamma*gradient
        loss = compute_mse(y, tx, w)  
        losses.append(loss)
            
    loss = losses[-1]
      
    return w, loss
Exemplo n.º 3
0
def least_squares_SGD(y, tx, initial_w, batch_size, max_iters, gamma, printing=True):
    """Linear regression using stochastic gradient descent for max_iters iteration given
    the input labelled data y, tx with initial_w and gamma as the initial weight and the
    learning rate respectively. Return final weights and loss"""
    
    w = initial_w
    
    # SGD iterations on batches of batch_size size
    for n_iter in range(max_iters):
        for minibatch_y, minibatch_tx in batch_iter(y, tx, batch_size):
        
            # Compute gradient, loss and update w
            _, grad = compute_gradient(minibatch_y,minibatch_tx,w)
            loss = compute_loss(minibatch_y,minibatch_tx,w)
            w = w - gamma*grad
        
        if printing==True: 
            print("SGD({bi}/{ti}): loss={l}, weights={w}".format(
                bi=n_iter, ti=max_iters - 1, l=loss, w=w))   
    
    return w, loss
Exemplo n.º 4
0
def least_squares_SGD(y, tx, initial_w, batch_size, max_iters, gamma):
    """Stochastic gradient descent."""

    w = initial_w
    losses = []
    threshold = 1e-6
    n_iter = 0
    
    for y_batch, tx_batch in batch_iter2(y, tx, batch_size=batch_size, num_batches=max_iters):
        
        n_iter += 1
            
            # compute gradient
        gradient = compute_gradient(y_batch, tx_batch, w)[0]
            
            # update w through the stochastic gradient update
        w = w - gamma * gradient
        loss = compute_mse(y, tx, w)
        losses.append(loss)
        if n_iter > 1:
            if abs(losses[-2] - losses[-1]) < threshold:
                break
    return w, losses[-1]
Exemplo n.º 5
0
')

expected_grads = np.array([
    0.76614, 0.97990, 0.37246, 0.49749, 0.64174, 0.74614, 0.88342, 0.56876,
    0.58467, 0.59814, 1.92598, 1.94462, 1.98965, 2.17855, 2.47834, 2.50225,
    2.52644, 2.72233
])

[
    actual_rolled_grads, actual_reg_w3_grad, actual_reg_w2_grad,
    actual_unreg_w3_grad, actual_unreg_w2_grad, actual_d3, actual_d2
] = compute_gradient(weights,
                     features,
                     outcomes,
                     input_layer_size,
                     hidden_layer_size,
                     output_layer_size,
                     regularization_strength,
                     actv_fn=sigmoid,
                     grad_fn=sigmoid_gradient,
                     testing=True)


def test_fn():
    assert (np.allclose(actual_rolled_grads, expected_grads))
    assert (np.allclose(expected_d3, actual_d3))
    assert (np.allclose(expected_d2, actual_d2))
    assert (np.allclose(expected_reg_w3_grad, actual_reg_w3_grad))
    assert (np.allclose(expected_reg_w2_grad, actual_reg_w2_grad))
    assert (np.allclose(expected_grads, actual_rolled_grads))
Exemplo n.º 6
0

def norm_diff(numerical_grad, analytical_grad, max_diff):
    print(
        '\nnp.linalg.norm(x - analytical_grad)/np.linalg.norm(x + analytical_grad);'
    )
    print('should be less than the max_diff of: ', max_diff)
    diff = np.linalg.norm(numerical_grad -
                          analytical_grad) / np.linalg.norm(numerical_grad +
                                                            analytical_grad)
    print('diff: ', diff, '\n\n')


[
    features, theta1, theta2, y, input_layer_size, hidden_layer_size,
    output_layer_size
] = create_simple_nn_params()
unrolled_weights = np.hstack([theta1.flatten(), theta2.flatten()])

if __name__ == '__main__':
    # Simple example to test that the gradient checking code runs correctly
    print('running simple grad check')
    grad_check(lambda x: np.sum(x**2), lambda x: x * 2, np.random.randn(4, 5))

    print(
        'running grad check on actual compute_cost and compute_gradient algos')
    grad_check(lambda *params: compute_cost(*params),
               lambda *params: compute_gradient(*params), unrolled_weights,
               features, y, input_layer_size, hidden_layer_size,
               output_layer_size)
Exemplo n.º 7
0
   1.70629   1.03385   1.10676; \
   1.75400   0.76894   0.77931; \
   1.79442   0.93566   0.96699  \
') / features.shape[0] # test cases didnt scale by num_examples
expected_grads = np.array([0.766138, 0.979897, -0.027540, -0.035844, -0.024929, -0.053862, 0.883417, 0.568762, 0.584668, 0.598139, 0.459314, 0.344618, 0.256313, 0.311885, 0.478337, 0.368920, 0.259771, 0.322331]).reshape(1,18)
[actual_rolled_grads,
 actual_w3_grad,
 actual_w2_grad,
 actual_unreg_w3_grad,
 actual_unreg_w2_grad,
 actual_d3,
 actual_d2
] = compute_gradient(
        weights,
        features,
        outcomes,
        input_layer_size,
        hidden_layer_size,
        output_layer_size,
        regularization_strength,
        True
    )


assert(np.all(abs(expected_d3 - actual_d3) <= .0001))
assert(np.all(abs(expected_d2 - actual_d2) <= .0001))
assert(np.all(abs(expected_unreg_w3_grad - actual_unreg_w3_grad) <= .0001))
assert(np.all(abs(expected_unreg_w2_grad - actual_unreg_w2_grad) <= .0001))
assert(np.all(abs(actual_rolled_grads - expected_grads) <= .0001))
#