示例#1
0
def least_squares_SGD(y, tx, initial_w, max_iters, gamma):
    '''Apply stochastic gradient descent method to minimize mean squared loss function
  for labels y and training data tx

  Parameters:
    y = labels, numpy column vector
    tx = data in matrix form (with first column = 1 for bias), one data entry per row
    numpy multidimensional array,
    initial_w = initial values for the weights, numpy column vector
    max_iters = number of steps for the stochastic gradient descent method
      must be >0 to return meaningful loss
    gamma = learning step

  Returns the weights corresponding to the last step
  '''

    assert max_iters > 0

    w = initial_w
    for i in range(max_iters):
        for minibatch_y, minibatch_x in batch_iter(y,
                                                   tx,
                                                   batch_size=1,
                                                   num_batches=1):
            # compute loss and gradient
            loss = compute_mse_loss(y, tx, w)
            gradient = compute_mean_squares_gradient(minibatch_y, minibatch_x,
                                                     w)
            # update parameters
            w = w - gamma * gradient

    return (w, loss)
def stochastic_gradient_descent(y, tx, initial_w, batch_size, max_iters,
                                gamma):
    """Stochastic gradient descent."""
    # Define parameters to store w and loss
    ws = [initial_w]
    losses = []
    w = initial_w

    for n_iter in range(max_iters):
        for y_batch, tx_batch in batch_iter(y,
                                            tx,
                                            batch_size=batch_size,
                                            num_batches=1):
            # compute a stochastic gradient and loss
            grad, _ = compute_stoch_gradient(y_batch, tx_batch, w)
            # update w through the stochastic gradient update
            w = w - gamma * grad
            # calculate loss
            loss = compute_loss(y, tx, w)
            # store w and loss
            ws.append(w)
            losses.append(loss)

        print("SGD({bi}/{ti}): loss={l}, w0={w0}, w1={w1}".format(
            bi=n_iter, ti=max_iters - 1, l=loss, w0=w[0], w1=w[1]))
    return losses, ws
def least_squares_SGD(y, tx, initial_w, max_iters, gamma, batch_size=1, lambda_=0, min_loss_threshold = 0):
    """ Linear regression using stochastic gradient descent """    
    
    print_step = np.maximum(int(max_iters/10), 1) # print status of gradient descent whenever a multiple of this
    
    w = initial_w

    loss_change = min_loss_threshold + 1
    loss = compute_loss_least_squares(y, tx, w, lambda_)
    n_iter = 0
    while (n_iter < max_iters) and (loss_change > min_loss_threshold):
        for minibatch_y, minibatch_tx in batch_iter(y, tx, batch_size):
            if n_iter >= max_iters:
                break
                
            grad = compute_gradient_least_squares(minibatch_y, minibatch_tx, w, lambda_)
            w = w - gamma * grad
            
            old_loss = loss
            loss = compute_loss_least_squares(y, tx, w, lambda_)
            
            loss_change = np.max(np.abs(loss - old_loss))
            if loss_change <= min_loss_threshold:
                break
                
            if n_iter % print_step == 0:
                print("Gradient Descent({bi}/{ti}): changeInLoss={lc}, loss={l}, w0={w0}, w1={w1}".format(
                    lc = loss_change, bi=n_iter, ti=max_iters - 1, l=loss, w0=w[0], w1=w[1]))
            
            n_iter = n_iter + 1
    
    return (w, loss)
示例#4
0
def stochastic_gradient_descent(y, tx, initial_w, batch_size, max_iters, gamma, loss_type=Loss.MSE):
    """Stochastic gradient descent algorithm."""
    # ***************************************************
    # INSERT YOUR CODE HERE
    # TODO: implement stochastic gradient descent.
    # ***************************************************
    ws = [initial_w]
    losses = [compute_loss(y, tx, initial_w, loss_type)]
    w = initial_w
    
    for n_iter in range(max_iters):
        for minibatch_y, minibatch_tx in batch_iter(y, tx, batch_size):
            # Ln(w)
            gradient_n = compute_stoch_gradient(minibatch_y, minibatch_tx, w, loss_type)
       
            w = w - gamma * gradient_n
            loss = compute_loss(y, tx, w, loss_type)
            
            # store w and loss
            ws.append(w)
            losses.append(loss)
            print("Stochastic Gradient Descent({bi}/{ti}): loss={l}, w0={w0}, w1={w1}".format(
                  bi=n_iter, ti=max_iters - 1, l=loss, w0=w[0], w1=w[1]))
    
    return losses, ws
示例#5
0
文件: mltb.py 项目: lejeunel/pcml_p2
def least_squares_SGD(y, x, gamma, max_iters, B=1, init_guess=None):
    """
    Estimate parameters of linear system using stochastic least squares gradient descent.
    In: x (NxD): Input matrix
        y (Nx1): Output vector
        init_guess (Dx1): Initial guess
        gamma: step_size
        B: batch size
        max_iters: Max number of iterations
    Where N and D are respectively the number of samples and dimension of input vectors
    Out: Estimated parameters
    """

    if (init_guess == None):
        init_guess = np.zeros((x.shape[1], 1))

    N = x.shape[0]
    w = list()
    w.append(init_guess)

    for minibatch_y, minibatch_x in hp.batch_iter(y,
                                                  x,
                                                  B,
                                                  num_batches=max_iters,
                                                  shuffle=True):
        w.append(w[-1] - gamma *
                 comp_ls_gradient(N, minibatch_x, minibatch_y -
                                  np.dot(minibatch_x, w[-1])))

    return w[-1]
def least_squares_SGD(y, tx, initial_w, batch_size, max_iters, gamma):
    """calculate the least squares solution using stochastic gradient descent."""
    w = initial_w
    for n_iter in range(max_iters):
        for minibatch_y, minibatch_tx in helpers.batch_iter(y, tx, batch_size,1):
            grad = compute_gradient(y,tx,w)
            w = w - gamma*grad
    return (w, compute_mse(y,tx,w))
示例#7
0
def reg_logistic_regression(y, tx, lambda_, initial_w, max_iters, gamma):
    w = initial_w
    for n_iter in range(max_iters):
        for y_batch, tx_batch in batch_iter(y,tx, batch_size=1, num_batches=1):
            grad = compute_logistic_gradient(y_batch, tx_batch, w, lambda_)
            w = w - gamma * grad
    loss = compute_logistic_loss(y, tx, w, lambda_)
    return w, loss
示例#8
0
def least_squares_SGD(y, tx, initial_w, max_iters, gamma):
    w = initial_w
    for n_iter in range(max_iters):
        for y_batch, tx_batch in batch_iter(y,tx, batch_size=1, num_batches=1):
            grad = compute_gradient(y_batch,tx_batch,w)
            w = w - gamma * grad
    loss = compute_mse(y,tx,w)
    return w, loss
示例#9
0
def model_predictions(model, data, vocab, DEVICE, BATCH_SIZE=16):
    """
    model: an instance of BertSCLSTM
    data: list of tuples, with each tuple consisting of correct and incorrect 
            sentence string (would be split at whitespaces)
    """

    topk = 1
    # print("###############################################")
    inference_st_time = time.time()
    final_sentences = []
    VALID_BATCH_SIZE = BATCH_SIZE
    # print("data size: {}".format(len(data)))
    data_iter = batch_iter(data, batch_size=VALID_BATCH_SIZE, shuffle=False)
    model.eval()
    model.to(DEVICE)
    for batch_id, (batch_labels, batch_sentences) in enumerate(data_iter):
        # set batch data for bert
        batch_labels_, batch_sentences_, batch_bert_inp, batch_bert_splits = bert_tokenize_for_valid_examples(
            batch_labels, batch_sentences)
        if len(batch_labels_) == 0:
            print("################")
            print(
                "Not predicting the following lines due to pre-processing mismatch: \n"
            )
            print([(a, b) for a, b in zip(batch_labels, batch_sentences)])
            print("################")
            continue
        else:
            batch_labels, batch_sentences = batch_labels_, batch_sentences_
        batch_bert_inp = {k: v.to(DEVICE) for k, v in batch_bert_inp.items()}
        # set batch data for others
        batch_labels_ids, batch_lengths = labelize(batch_labels, vocab)
        batch_idxs, batch_lengths_ = sclstm_tokenize(batch_sentences, vocab)
        assert (batch_lengths_ == batch_lengths).all() == True
        assert len(batch_bert_splits) == len(batch_idxs)
        batch_idxs = [batch_idxs_.to(DEVICE) for batch_idxs_ in batch_idxs]
        batch_lengths = batch_lengths.to(DEVICE)
        batch_labels_ids = batch_labels_ids.to(DEVICE)
        # forward
        with torch.no_grad():
            """
            NEW: batch_predictions can now be of shape (batch_size,batch_max_seq_len,topk) if topk>1, else (batch_size,batch_max_seq_len)
            """
            _, batch_predictions = model(batch_idxs,
                                         batch_lengths,
                                         batch_bert_inp,
                                         batch_bert_splits,
                                         targets=batch_labels_ids,
                                         topk=topk)
        batch_predictions = untokenize_without_unks(batch_predictions,
                                                    batch_lengths, vocab,
                                                    batch_labels)
        final_sentences.extend(batch_predictions)
    # print("total inference time for this data is: {:4f} secs".format(time.time()-inference_st_time))
    return final_sentences
def stochastic_gradient_descent(y, tx, initial_w, batch_size, max_epochs,
                                gamma, compute_stoch_gradient):
    """Stochastic gradient descent algorithm."""
    w = initial_w
    losses = np.zeros(max_epochs)
    ws = np.zeros((max_epochs, w.shape[0]))
    for i in range(max_epochs):
        generator = batch_iter(y, tx, batch_size)
        y_n, tx_n = next(generator)
        g = compute_stoch_gradient(y_n, tx_n, w)
        w = w - gamma * g
        ws[i] = w
        losses[i] = compute_cost(y, tx, w)
    return losses, ws
示例#11
0
def least_squares_SGD(y, tx, initial_w, max_iters, gamma):
    # initializing the weights, the batch size and the number of batches
    w = initial_w
    batch_size = 1
    num_batches = 1
    for i in range(max_iters):
        # iterating for each batch
        for y_batch, tx_batch in batch_iter(y, tx, batch_size, num_batches):
            # computing the gradient
            gradient = compute_gradient(y_batch, tx_batch, w)
            # updating the weights
            w = w - gamma * gradient
    # return w with the corresponding loss
    return w, compute_loss(y, tx, w)
def least_squares_SGD(y, tx, initial_w, batch_size, max_iters, gamma):
    # ***************************************************
    w = initial_w
    for n_iter in range(max_iters):
        for minibatch_y, minibatch_tx in batch_iter(y,
                                                    tx,
                                                    batch_size,
                                                    num_batches=1):
            gradient = compute_gradient(minibatch_y, minibatch_tx, w)
            # update w by gradient
            w = w - gamma * gradient  # computes the new w(t+1)

    loss = compute_loss(y, tx, w)
    return w, loss
示例#13
0
def least_squares_SGD(y, tx, initial_w, max_iters, gamma):
    """Stochastic gradient descent algorithm."""

    w = initial_w
    for n_iter in range(max_iters):

        for minibatch_y, minibatch_tx in batch_iter(y, tx, 1):

            # compute gradient and loss
            gradient = compute_gradient(minibatch_y, minibatch_tx, w)

            # update w by gradient
            w = w - gamma * gradient

    return w, compute_loss_MSE(y, tx, w)
示例#14
0
    def test_batch_iter(self):
        """
        Tests batching function
         
        """
        from helpers import batch_iter
        import scipy.sparse as sp

        A = sp.csr_matrix(
            np.array([[1., 2., 3.], [0., -1., 1.], [3., 4., 5.], [1., 2., 3.],
                      [0., -1., 1.], [3., 4., 5.], [1., 2., 3.], [0., -1., 1.],
                      [3., 4., 5.]]))
        B = list(batch_iter(A, A, 2))

        self.assertEqual(len(B), 5)
示例#15
0
def least_squares_SGD(y, tx, initial_w, max_iters, gamma):
    """Stochastic gradient descent algorithm."""
    batch_size = 5000
    w = initial_w

    for n_iter in range(max_iters):
        y_, tx_ = batch_iter(y, tx, batch_size).__next__()
        gradient = compute_gradient(y_, tx_, w)
        w = w - gamma * gradient
        if n_iter % 3 == 0:
            gamma = gamma / 1.2

    loss = compute_loss(y, tx, w)
    #	loss = calculate_nll(y, tx, w)

    return w, loss
def model_predictions(model, data, vocab, DEVICE, BATCH_SIZE=16):
    """
    model: an instance of ElmoSCTransformer
    data: list of tuples, with each tuple consisting of correct and incorrect 
            sentence string (would be split at whitespaces)
    """

    topk = 1
    print("###############################################")
    inference_st_time = time.time()
    final_sentences = []
    VALID_BATCH_SIZE = BATCH_SIZE
    print("data size: {}".format(len(data)))
    data_iter = batch_iter(data, batch_size=VALID_BATCH_SIZE, shuffle=False)
    model.eval()
    model.to(DEVICE)
    for batch_id, (batch_clean_sentences,
                   batch_corrupt_sentences) in enumerate(data_iter):
        # set batch data
        batch_labels, batch_lengths = labelize(batch_clean_sentences, vocab)
        batch_idxs, batch_lengths_, inverted_mask = sctrans_tokenize(
            batch_corrupt_sentences, vocab)
        assert (batch_lengths_ == batch_lengths).all() == True
        batch_idxs = [batch_idxs_.to(DEVICE) for batch_idxs_ in batch_idxs]
        batch_lengths = batch_lengths.to(DEVICE)
        batch_labels = batch_labels.to(DEVICE)
        inverted_mask = inverted_mask.to(DEVICE)
        batch_elmo_inp = elmo_batch_to_ids(
            [line.split() for line in batch_corrupt_sentences]).to(DEVICE)
        # forward
        with torch.no_grad():
            """
            NEW: batch_predictions can now be of shape (batch_size,batch_max_seq_len,topk) if topk>1, else (batch_size,batch_max_seq_len)
            """
            _, batch_predictions = model(batch_idxs,
                                         inverted_mask,
                                         batch_lengths,
                                         batch_elmo_inp,
                                         targets=batch_labels,
                                         topk=topk)
        batch_predictions = untokenize_without_unks(batch_predictions,
                                                    batch_lengths, vocab,
                                                    batch_clean_sentences)
        final_sentences.extend(batch_predictions)
    print("total inference time for this data is: {:4f} secs".format(
        time.time() - inference_st_time))
    return final_sentences
示例#17
0
def logistic_regression(y, tx, initial_w, max_iters, gamma):
    """
    @param gamma: step size
    @param max_iters: maximum nuber of iterations
    @return : optimal weights, minimum mse
    """
    batch_size = 10000
    losses = []
    w = initial_w
    y_batch = np.zeros((batch_size, 1))
    for iter in range(max_iters):
        batch = batch_iter(y, tx, batch_size, num_batches=1, shuffle=True)
        y_batch[:, 0], tx_batch = next(batch)
        loss, w = log_gradient_descent(y_batch, tx_batch, w, gamma)
        losses.append(loss)
        # print("Current iteration={i}, the loss={l}".format(i=iter, l=loss))
    return w, loss
示例#18
0
def model_predictions(model, data, vocab, DEVICE, BATCH_SIZE=16):
    """
    model: an instance of CharLSTMWordLSTMModel
    data: list of tuples, with each tuple consisting of correct and incorrect 
            sentence string (would be split at whitespaces)
    """

    topk = 1
    print("###############################################")
    inference_st_time = time.time()
    final_sentences = []
    VALID_BATCH_SIZE = BATCH_SIZE
    print("data size: {}".format(len(data)))
    data_iter = batch_iter(data, batch_size=VALID_BATCH_SIZE, shuffle=False)
    model.eval()
    model.to(DEVICE)
    for batch_id, (batch_clean_sentences,
                   batch_corrupt_sentences) in tqdm(enumerate(data_iter)):
        # set batch data
        batch_labels, batch_lengths = labelize(batch_clean_sentences, vocab)
        batch_idxs, batch_lengths_, batch_char_lengths = char_tokenize(
            batch_corrupt_sentences, vocab, return_nchars=True)
        assert (batch_lengths_ == batch_lengths).all() == True
        batch_idxs = [batch_idxs_.to(DEVICE) for batch_idxs_ in batch_idxs]
        batch_char_lengths = [
            batch_char_lengths_.to(DEVICE)
            for batch_char_lengths_ in batch_char_lengths
        ]
        batch_lengths = batch_lengths.to(DEVICE)
        batch_labels = batch_labels.to(DEVICE)
        # forward
        with torch.no_grad():
            # because topk=1, batch_predictions are of shape (batch_size,batch_max_seq_len)
            _, batch_predictions = model(batch_idxs,
                                         batch_char_lengths,
                                         batch_lengths,
                                         targets=batch_labels,
                                         topk=topk)
        batch_predictions = untokenize_without_unks(batch_predictions,
                                                    batch_lengths, vocab,
                                                    batch_clean_sentences)
        final_sentences.extend(batch_predictions)
    print("total inference time for this data is: {:4f} secs".format(
        time.time() - inference_st_time))
    return final_sentences
def least_squares_SGD(y, x, initial_w, max_iters, gamma, mae=False, threshold=1e-5):
    """
    Implementation of the Stochastic Gradient Descent optimization algorithm for linear regression
    Can be run with both MSE and MAE loss

    :param x: data matrix, numpy ndarray with shape with shape (N, D),
              where N is the number of samples and D is the number of features
    :param y: vector of target values, numpy array with dimensions (N, 1)
    :param initial_w: vector of initial weights, numpy array with dimensions (D, 1)
    :param max_iters: how many iterations to run the algorithm, integer
    :param gamma: learning rate, positive float value
    :param mae: whether to use MAE loss, boolean, optional, the default value is False
    :param threshold: convergence threshold, positive float value

    :returns: (final weights, final loss value), tuple
    """

    # Set the initial values for the weights
    w = initial_w

    # Compute the initial loss value
    prev_loss = compute_loss(y, x, initial_w, mae)

    # Use the helper function batch_iter from Exercise 2,
    # to get a random sample from the data in the form (y_n, x_n) for each iteration
    for n_iter in range(max_iters):
        for y_n, x_n in batch_iter(y, x, batch_size=1, num_batches=1):
            # Compute the gradient for only one sample (or subgradient if MAE loss is used)
            grd = compute_subgradient_mae(y_n, x_n, w) if mae else compute_gradient_mse(y_n, x_n, w)

            # Update the weights using the gradient and learning rate
            w = w - gamma * grd

        # Compute the current loss and test convergence
        loss = compute_loss(y, x, w, mae)     
        if abs(loss - prev_loss) < threshold:
            print(f'converged at iter : {n_iter}')
            break   
        prev_loss = loss.copy()

    # Compute the final loss value
    loss = compute_loss(y, x, w, mae)

    return w, loss
示例#20
0
def reg_logistic_regression(y, tx, lambda_, initial_w, max_iters, gamma):
    """
    @param gamma: learning rate
    @param max_iters: maximum nuber of iterations
    @param batch_size: the size of batchs used for calculating the stochastic gradient
    @return : optimal weights, minimum mse
    """
    batch_size = y.shape[0] / 10
    losses = []
    w = np.zeros((tx.shape[1], 1))
    y_batch = np.zeros((batch_size, 1))
    for iter in range(max_iters):
        batch = batch_iter(y, tx, batch_size, num_batches=1, shuffle=True)
        y_batch[:, 0], tx_batch = next(batch)
        loss, w = reg_log_gradient_descent(y_batch, tx_batch, w, gamma,
                                           lambda_)
        losses.append(loss)
        # print("Current iteration={i}, the loss={l}".format(i=iter, l=loss))
    return w, loss
def stochastic_gradient_descent(y, tx, initial_w, batch_size, max_iters,
                                gamma):
    """Stochastic gradient descent algorithm."""
    ws = [initial_w]
    losses = [compute_loss(y, tx, initial_w)]
    w = initial_w
    for y_batch, tx_batch in batch_iter(y, tx, batch_size, max_iters):
        gradient = compute_gradient(y, tx, w)

        w = w - gamma * gradient
        loss = compute_loss(y, tx, w)

        # store w and loss
        ws.append(w)
        losses.append(loss)
        print("Stochastic Gradient Descent: loss={l}, w0={w0}, w1={w1}".format(
            l=loss, w0=w[0], w1=w[1]))

    return losses, ws
示例#22
0
def running_gradient(y, tx, w, lambda_, method='penalized'):
    """
    run gradient descent, using logistic regression,
    penalized log regression or newton method.
    Return the loss and final weights.
    """
    # ***************************************************
    max_iter = 5000
    gamma = 0.01

    threshold = 1e-8

    losses = []
    batch_size = 5000
    n_iter = 0
    # start gradient descent
    for minibatch_y, minibatch_tx in batch_iter(y,
                                                tx,
                                                batch_size,
                                                num_batches=max_iter):
        # get loss and update w.
        if method == 'penalized':
            loss, w = learning_by_penalized_gradient(minibatch_y, minibatch_tx,
                                                     w, gamma, lambda_)
        if method == 'newton':
            loss, w = learning_by_newton_method(minibatch_y, minibatch_tx, w,
                                                gamma)
        if method == 'gradient':
            loss, w = learning_by_gradient_descent(minibatch_y, minibatch_tx,
                                                   w, gamma)
        # log info
        if n_iter % 10 == 0:
            #print(w)
            print("Current iteration={i}, loss={l}".format(i=n_iter, l=loss))
        # converge criterion
        #if len(losses) == 1000:
        #   break
        losses.append(loss)
        if len(losses) > 1 and np.abs(losses[-1] - losses[-2]) < threshold:
            break
        n_iter += 1
    return w
示例#23
0
def least_squares_SGD(y, tx, initial_w, max_iters, gamma):

    batch_size = 1

    # Define parameters to store w and loss
    loss = 0
    w = initial_w

    for n_iter, [minib_y, minib_tx
                 ] in enumerate(batch_iter(y, tx, batch_size, max_iters)):

        grad = compute_gradient(minib_y, minib_tx, w)
        loss = compute_mse(y, tx, w)

        if n_iter % 100 == 0:
            print("Current iteration={i}, loss={l}".format(i=n_iter, l=loss))

        w = w - gamma * grad

    return w, loss
示例#24
0
def least_squares_SGD(y, tx, initial_w, max_iters, gamma):
    """Stochastic gradient descent algorithm."""
    if len(initial_w.shape) == 2:
        initial_w = initial_w.reshape((max(initial_w.shape)))
    if len(y.shape) == 2:
        y = y.reshape((max(y.shape)))

    batch_size = 5000
    w = initial_w

    for n_iter in range(max_iters):
        y_, tx_ = batch_iter(y, tx, batch_size).__next__()
        gradient = compute_gradient(y_, tx_, w)
        w = w - gamma * gradient
        if n_iter % 3 == 0:
            gamma = gamma / 1.2

    loss = compute_loss(y, tx, w)

    return w, loss
def stochastic_gradient_descent(y, tx, initial_w, batch_size, max_epochs,
                                gamma):
    ws = [initial_w]
    losses = []
    w = initial_w
    n_iter = 0
    for minibatch_y, minibatch_tx in batch_iter(y, tx, batch_size, max_epochs):

        grad = compute_stoch_gradient(minibatch_y, minibatch_tx, w)
        loss = co.compute_loss(y, tx, w)

        w = w - gamma * grad
        # store w and loss
        ws.append(np.copy(w))
        losses.append(loss)

        n_iter += 1
    print("Gradient Descent({bi}/{ti}): loss={l}".format(bi=max_epochs - 1,
                                                         ti=max_epochs - 1,
                                                         l=loss))
    return losses, ws
def stochastic_gradient_descent(y, tx, initial_w, batch_size, max_iters,
                                gamma):
    """Stochastic gradient descent algorithm."""
    # Define parameters to store w and loss
    ws = [initial_w]
    losses = []
    w = initial_w
    for n_iter in range(max_iters):
        # get a random minibatch of data
        for minibatch_y, minibatch_x in batch_iter(y, tx, batch_size):
            grad = compute_stoch_gradient(minibatch_y, minibatch_x, w)
            loss = compute_loss(minibatch_y, minibatch_x, w)
            w = w - gamma * grad
            # store w and loss
            ws.append(w)
            losses.append(loss)
        print(
            "Stochastic Gradient Descent({bi}/{ti}): loss={l}, w0={w0}, w1={w1}"
            .format(bi=n_iter, ti=max_iters - 1, l=loss, w0=w[0], w1=w[1]))

    return losses, ws
示例#27
0
def stochastic_gradient_descent(
        y, tx, initial_w, batch_size, max_iters, gamma, loss_type):
    """Stochastic gradient descent algorithm."""
    ws = [initial_w]
    losses = []
    w = initial_w
    batch_size = 5
    
    for n_iter in range(max_iters):
        for minibatch_y, minibatch_tx in batch_iter(y, tx, batch_size):
            grad = compute_stoch_gradient(minibatch_y, minibatch_tx, w, loss_type)
            loss = compute_loss(y, tx, w, loss_type)
            # Update
            w = w - gamma*grad
            # Store
            ws.append(w)
            losses.append(loss)
            
        print("Stochastic Gradient Descent({bi}/{ti}): loss={l}, w0={w0}, w1={w1}".format(
              bi=n_iter, ti=max_iters - 1, l=loss, w0=w[0], w1=w[1]))
        
    return losses, ws
def stochastic_subgradient_descent(y,
                                   tx,
                                   initial_w,
                                   batch_size,
                                   max_iters,
                                   gamma,
                                   ltype="MAE"):
    """Stochastic gradient descent algorithm."""
    # ***************************************************
    # implement stochastic gradient descent.
    w = initial_w
    g = 0
    num_batches = 1
    for n_iter in range(max_iters):
        for minibatch_y, minibatch_tx in batch_iter(y, tx, batch_size,
                                                    num_batches):
            g = compute_subgradient(minibatch_y, minibatch_tx, w)
            # update w by gradient
            w = w - gamma * g  # computes the new w(t+1)
    loss = compute_loss_subgradient(y, tx, w)  # compute final error

    return w, loss
def stochastic_gradient_descent(
        y, tx, initial_w, batch_size, max_iters, gamma):
    """Stochastic gradient descent."""
    # Define parameters to store w and loss
    ws = [initial_w]
    losses = []
    w = initial_w

    for n_iter in range(max_iters):
        for y_batch, tx_batch in batch_iter(y, tx, batch_size=batch_size, num_batches=1):
            # compute a stochastic gradient and loss
            grad, _ = compute_stoch_gradient(y_batch, tx_batch, w)
            # update w through the stochastic gradient update
            w = w - gamma * grad
            # calculate loss
            loss = compute_loss(y, tx, w)
            # store w and loss
            ws.append(w)
            losses.append(loss)

        print("SGD({bi}/{ti}): loss={l}, w0={w0}, w1={w1}".format(
              bi=n_iter, ti=max_iters - 1, l=loss, w0=w[0], w1=w[1]))
    return losses, ws
示例#30
0
def least_squares_sgd(y, tx, initial_w, max_iters, gamma):
    """ Linear regression using stochastic gradient descent
    """
    # if initial_w is None, we initialize it to a zeros vector
    if (initial_w is None):
        initial_w = np.zeros(tx.shape[1])

    # Define parameters of the algorithm
    batch_size = 1

    # Define parameters to store w and loss
    loss = 0
    w = initial_w

    for n_iter, [mb_y,
                 mb_tx] in enumerate(batch_iter(y, tx, batch_size, max_iters)):
        # compute gradient and loss
        gradient = compute_gradient(mb_y, mb_tx, w)
        loss = compute_loss(y, tx, w)

        # update w by gradient
        w -= gamma * gradient

    return w, loss
示例#31
0
def least_squares_SGD(y, tx, initial_w, max_iters, gamma):
    """
    Linear regression using stochastic gradient descent
    @param gamma: step size
    @param max_iters: maximum number of iterations
    @param batch_size: the size of batchs used for calculating the stochastic gradient
    @return: optimal weights, minimum mse
    """
    batch_size = 5000
    ws = [initial_w]
    losses = []
    w = initial_w
    for i in range(max_iters):
        for minibatch_y, minibatch_tx in batch_iter(y, tx, batch_size):
            stoch_gradient = compute_gradient(minibatch_y, minibatch_tx, w)
            loss = compute_loss(y, tx, w)
            w = w - gamma * stoch_gradient
            # store w and loss
            ws.append(np.copy(w))
            losses.append(loss)
            #print("SGD ({bi}/{ti}): loss={l}".format(bi=i, ti=max_iters - 1, l=loss))
    min_loss = min(losses)
    w = ws[losses.index(min_loss)]
    return w, min_loss