Пример #1
0
    def __init__(self,
                 loss,
                 regularization=None,
                 learning_rate=0.01,
                 reg_param=0.05):
        self.learning_rate = learning_rate

        # Select regularizer
        if regularization == 'l1':
            regularizer = L1Regularization(reg_param)  #?
        elif regularization == 'l2':
            regularizer = L2Regularization(reg_param)
        elif regularization is None:
            regularizer = None
        else:
            raise ValueError(
                'Regularizer {} is not defined'.format(regularization))

        # Select loss function
        if loss == 'hinge':
            self.loss = HingeLoss(regularizer)
        elif loss == 'squared':
            self.loss = SquaredLoss(regularizer)
        else:
            raise ValueError('Loss function {} is not defined'.format(loss))

        self.model = None
Пример #2
0
def test_squared_loss_forward():
    """
    Tests the forward pass of the squared loss function
    """
    from your_code import SquaredLoss
    X = np.array([[-1, 2, 1], [-3, 4, 1]])
    w = np.array([1, 2, 3])
    y = np.array([1, -1])

    loss = SquaredLoss(regularization=None)

    _true = 26.5
    _est = loss.forward(X, w, y)
    print(_est)
Пример #3
0
def test_squared_loss_backward():
    """
    Tests the backward pass of the squared loss function
    """
    from your_code import SquaredLoss
    X = np.array([[-1, 2, 1], [-3, 4, 1]])
    w = np.array([1, 2, 3])
    y = np.array([1, -1])

    loss = SquaredLoss(regularization=None)

    _true = np.array([-16, 23, 7])
    _est = loss.backward(X, w, y)

    assert np.allclose(_true, _est)
Пример #4
0
class GradientDescent:
    def __init__(self, loss, regularization=None,
                 learning_rate=0.01, reg_param=0.05):
        self.learning_rate = learning_rate

        if regularization == 'l1':
            regularizer = L1Regularization(reg_param)
        elif regularization == 'l2':
            regularizer = L2Regularization(reg_param)
        elif regularization is None:
            regularizer = None
        else:
            raise ValueError(
                'Regularizer {} is not defined'.format(regularization))

        if loss == 'hinge':
            self.loss = HingeLoss(regularizer)
        elif loss == 'squared':
            self.loss = SquaredLoss(regularizer)
        else:
            raise ValueError('Loss function {} is not defined'.format(loss))

        self.model = None

    def fit(self, features, targets, batch_size=None, max_iter=1000):
        features = np.append(features, np.ones((np.shape(features)[0], 1)), axis=1)

        self.model = np.random.uniform(-0.1, 0.1, features.shape[1])

        loss = -9999999
        new_loss = self.loss.forward(features, self.model, targets)

        if batch_size:
            new_features = np.array_split(features[:-np.shape(features)[0] % batch_size, :], int(len(features) / batch_size))
            new_targets = np.array_split(targets[:-np.shape(features)[0] % batch_size], int(len(targets) / batch_size))

            order = np.random.shuffle(range(np.shape(new_features)[0]))

        counter = 0
        while abs(new_loss - loss) > 0 and counter < max_iter:
            new_loss = loss

            if batch_size:
                for i in order:
                    gradient = self.loss.backward(new_features[i], self.model, new_targets[i])
                    self.model = self.model-self.learning_rate * gradient

                loss = self.loss.forward(features, self.model, targets)
                np.random.shuffle(order)

            else:
                gradient = self.loss.backward(features, self.model, targets)
                self.model = self.model-self.learning_rate * gradient
                new_loss = self.loss.forward(features, self.model, targets)

            counter += 1


    def predict(self, features):
        features = np.squeeze(np.append(features, np.ones((np.shape(features)[0], 1)), axis=1))
        return np.squeeze(np.where(self.confidence(features) < 0, -1, 1))

    def confidence(self, features):
        ans=np.empty([])

        for x in features:
            ans=np.append(ans, np.squeeze(np.dot(np.transpose(self.model), x)))

        ans=np.delete(ans, 0)

        return np.squeeze(ans)
Пример #5
0
class GradientDescent:
    """
    This is a linear classifier similar to the one you implemented in the
    linear regressor homework. This is the classification via regression
    case. The goal here is to learn some hyperplane, y = w^T x + b, such that
    when features, x, are processed by our model (w and b), the result is
    some value y. If y is in [0.0, +inf), the predicted classification label
    is +1 and if y is in (-inf, 0.0) the predicted classification label is
    -1.

    The catch here is that we will not be using the closed form solution,
    rather, we will be using gradient descent. In your fit function you
    will determine a loss and update your model (w and b) using gradient
    descent. More details below.

    Arguments:
            loss - (string) The loss function to use. Either 'hinge' or 'squared'.
        regularization - (string or None) The type of regularization to use.
            One of 'l1', 'l2', or None. See regularization.py for more details.
        learning_rate - (float) The size of each gradient descent update step.
        reg_param - (float) The hyperparameter that controls the amount of
            regularization to perform. Must be non-negative.
    """
    def __init__(self,
                 loss,
                 regularization=None,
                 learning_rate=0.01,
                 reg_param=0.05):
        self.learning_rate = learning_rate

        # Select regularizer
        if regularization == 'l1':
            regularizer = L1Regularization(reg_param)  #?
        elif regularization == 'l2':
            regularizer = L2Regularization(reg_param)
        elif regularization is None:
            regularizer = None
        else:
            raise ValueError(
                'Regularizer {} is not defined'.format(regularization))

        # Select loss function
        if loss == 'hinge':
            self.loss = HingeLoss(regularizer)
        elif loss == 'squared':
            self.loss = SquaredLoss(regularizer)
        else:
            raise ValueError('Loss function {} is not defined'.format(loss))

        self.model = None

    def fit(self, features, targets, batch_size=None, max_iter=1000):
        """
        Fits a gradient descent learner to the features and targets. The
        pseudocode for the fitting algorithm is as follow:
          - Initialize the model parameters to uniform random values in the
            interval [-0.1, +0.1].
          - While not converged:
            - Compute the gradient of the loss with respect to the current
              batch.
            - Update the model parameters by moving them in the direction
              opposite to the current gradient. Use the learning rate as the
              step size.
        For the convergence criteria, compute the loss over all examples. If
        this loss changes by less than 1e-4 during an update, assume that the
        model has converged. If this convergence criteria has not been met
        after max_iter iterations, also assume convergence and terminate.
        You should include a bias term by APPENDING a column of 1s to your
        feature matrix. The bias term is then the last value in self.model.
        Arguments:
            features - (np.array) An Nxd array of features, where N is the
                number of examples and d is the number of features.
            targets - (np.array) A 1D array of targets of length N.
            batch_size - (int or None) The number of examples used in each
                iteration. If None, use all of the examples in each update.
            max_iter - (int) The maximum number of updates to perform.
        Modifies:
            self.model - (np.array) A 1D array of model parameters of length
                d+1. The +1 refers to the bias term.
        """
        N = features.shape[0]
        d = features.shape[1]
        X = np.hstack((features, np.ones((N, 1))))  #[x1,x2... 1]

        # np.random.seed
        w = np.random.uniform(-0.1, 0.1, d + 1)
        iter = 0
        prev_loss = 100000

        feature_indices = []

        if not batch_size:
            batch_size = N
        while iter < max_iter:
            feature_indices = np.arange(batch_size)
            np.random.shuffle(feature_indices)
            # if batch_size==None:
            # feature_indices = np.random.choice(N, N, replace=False)

            selected_features = X[feature_indices[:], :]

            w_copy = np.copy(w)
            loss = self.loss.forward(X=selected_features,
                                     w=w_copy,
                                     y=targets[feature_indices[:]])

            # import pdb; pdb.set_trace()
            # print("loss: ", loss)

            # if abs(loss ) < 1e-4:       #TODO: low loss?
            if abs(loss - prev_loss) < 1e-4:  #TODO: changed convergence
                # print("exiting loop, prev loss: ", prev_loss)
                print("iter: ", iter)
                break
            else:
                w = w - self.learning_rate * self.loss.backward(
                    X=selected_features,
                    w=w_copy,
                    y=targets[feature_indices[:]])  #?
                iter += 1
                prev_loss = loss

        self.model = w

        #test
        return iter

    def predict(self, features):
        """
        Predicts the class labels of each example in features. Model output
        values at and above 0 are predicted to have label +1. Non-positive
        output values are predicted to have label -1.

        NOTE: your predict function should make use of your confidence
        function (see below).

        Arguments:
            features - (np.array) A Nxd array of features, where N is the
                number of examples and d is the number of features.
        Returns:
            predictions - (np.array) A 1D array of predictions of length N,
                where index d corresponds to the prediction of row N of
                features.
        """
        N = features.shape[0]
        d = features.shape[1]
        X = np.hstack((features, np.ones((N, 1))))  #[x1,x2... 1]
        # print ("X: ", X)
        # print ("raw output: ", self.model.dot(X.T))

        return np.sign(self.model.dot(X.T))

    def confidence(self, features):
        """
        Returns the raw model output of the prediction. In other words, rather
        than predicting +1 for values above 0 and -1 for other values, this
        function returns the original, unquantized value.

        Arguments:
            features - (np.array) A Nxd array of features, where N is the
                number of examples and d is the number of features.
        Returns:
            confidence - (np.array) A 1D array of confidence values of length
                N, where index d corresponds to the confidence of row N of
                features.
        """

        N = features.shape[0]
        d = features.shape[1]
        X = np.hstack((features, np.ones((N, 1))))  #[x1,x2... 1]

        return self.model.dot(X.T)
class GradientDescent:
    """
    This is a linear classifier similar to the one you implemented in the
    linear regressor homework. This is the classification via regression
    case. The goal here is to learn some hyperplane, y = w^T x + b, such that
    when features, x, are processed by our model (w and b), the result is
    some value y. If y is in [0.0, +inf), the predicted classification label
    is +1 and if y is in (-inf, 0.0) the predicted classification label is
    -1.

    The catch here is that we will not be using the closed form solution,
    rather, we will be using gradient descent. In your fit function you
    will determine a loss and update your model (w and b) using gradient
    descent. More details below.

    Arguments:
        loss - (string) The loss function to use. Either 'hinge' or 'squared'.
        regularization - (string or None) The type of regularization to use.
            One of 'l1', 'l2', or None. See regularization.py for more details.
        learning_rate - (float) The size of each gradient descent update step.
        reg_param - (float) The hyperparameter that controls the amount of
            regularization to perform. Must be non-negative.
    """
    def __init__(self, loss, regularization=None,
                 learning_rate=0.01, reg_param=0.05):
        self.learning_rate = learning_rate

        # Select regularizer
        if regularization == 'l1':
            regularizer = L1Regularization(reg_param)
        elif regularization == 'l2':
            regularizer = L2Regularization(reg_param)
        elif regularization is None:
            regularizer = None
        else:
            raise ValueError(
                'Regularizer {} is not defined'.format(regularization))

        # Select loss function
        if loss == 'hinge':
            self.loss = HingeLoss(regularizer)
        elif loss == 'squared':
            self.loss = SquaredLoss(regularizer)
        else:
            raise ValueError('Loss function {} is not defined'.format(loss))

        self.model = None

    def fit(self, features, targets, batch_size=None, max_iter=1000):
        """
        Fits a gradient descent learner to the features and targets. The
        pseudocode for the fitting algorithm is as follow:
          - Initialize the model parameters to uniform random values in the
            interval [-0.1, +0.1].
          - While not converged:
            - Compute the gradient of the loss with respect to the current
              batch.
            - Update the model parameters by moving them in the direction
              opposite to the current gradient. Use the learning rate as the
              step size.
        For the convergence criteria, compute the loss over all examples. If
        this loss changes by less than 1e-4 during an update, assume that the
        model has converged. If this convergence criteria has not been met
        after max_iter iterations, also assume convergence and terminate.

        You should include a bias term by APPENDING a column of 1s to your
        feature matrix. The bias term is then the last value in self.model.

        Arguments:
            features - (np.array) An Nxd array of features, where N is the
                number of examples and d is the number of features.
            targets - (np.array) A 1D array of targets of length N.
            batch_size - (int or None) The number of examples used in each
                iteration. If None, use all of the examples in each update.
            max_iter - (int) The maximum number of updates to perform.
        Modifies:
            self.model - (np.array) A 1D array of model parameters of length
                d+1. The +1 refers to the bias term.
        """
        weights = np.random.uniform(-.1,.1,(1,features.shape[1]))
        y = np.ones((features.shape[0],1))
        features = np.append(features,y, axis = 1)
        print("features")
        print(features)
        if(batch_size != None):
            features = features[0:batch_size]
        
        weights = np.append(weights,1)
       
        loss = 1
        newLoss = 0
        gradient = 0
        for i in range(max_iter):
            while(abs(loss - newLoss) > .0001):
                loss = self.loss.forward(features, weights, targets)
                gradient = self.loss.backward(features, weights, targets)
                weights = weights - (self.learning_rate * gradient)
                newLoss = self.loss.forward(features, weights, targets)
            
            
        
        self.model = weights
        


    def predict(self, features):
        """
        Predicts the class labels of each example in features. Model output
        values at and above 0 are predicted to have label +1. Non-positive
        output values are predicted to have label -1.

        NOTE: your predict function should make use of your confidence
        function (see below).

        Arguments:
            features - (np.array) A Nxd array of features, where N is the
                number of examples and d is the number of features.
        Returns:
            predictions - (np.array) A 1D array of predictions of length N,
                where index d corresponds to the prediction of row N of
                features.
        """
        y = np.ones((features.shape[0],1))
        features = np.append(features,y, axis = 1)
        confidence = self.confidence(features)
        predictions = []
        for i in range(len(confidence)):
            predictions.append(np.sign(confidence[i]))
        
        return np.asarray(predictions)

        

    def confidence(self, features):
        """
        Returns the raw model output of the prediction. In other words, rather
        than predicting +1 for values above 0 and -1 for other values, this
        function returns the original, unquantized value.

        Arguments:
            features - (np.array) A Nxd array of features, where N is the
                number of examples and d is the number of features.
        Returns:
            confidence - (np.array) A 1D array of confidence values of length
                N, where index d corresponds to the confidence of row N of
                features.
        """
        confidence = []
        
        for i in range (features.shape[0]):
            confidence.append(np.dot(self.model, features[i]))
        
        return np.asarray(confidence)
class GradientDescentQ1:
    """
    This is a linear classifier similar to the one you implemented in the
    linear regressor homework. This is the classification via regression
    case. The goal here is to learn some hyperplane, y = w^T x + b, such that
    when features, x, are processed by our model (w and b), the result is
    some value y. If y is in [0.0, +inf), the predicted classification label
    is +1 and if y is in (-inf, 0.0) the predicted classification label is
    -1.

    The catch here is that we will not be using the closed form solution,
    rather, we will be using gradient descent. In your fit function you
    will determine a loss and update your model (w and b) using gradient
    descent. More details below.

    Arguments:
        loss - (string) The loss function to use. Either 'hinge' or 'squared'.
        regularization - (string or None) The type of regularization to use.
            One of 'l1', 'l2', or None. See regularization.py for more details.
        learning_rate - (float) The size of each gradient descent update step.
        reg_param - (float) The hyperparameter that controls the amount of
            regularization to perform. Must be non-negative.
    """
    def __init__(self,
                 loss,
                 regularization=None,
                 learning_rate=0.01,
                 reg_param=0.05,
                 question='1a'):
        self.learning_rate = learning_rate

        # Select regularizer
        if regularization == 'l1':
            regularizer = L1Regularization(reg_param)
        elif regularization == 'l2':
            regularizer = L2Regularization(reg_param)
        elif regularization is None:
            regularizer = None
        else:
            raise ValueError(
                'Regularizer {} is not defined'.format(regularization))

        # Select loss function
        if loss == 'hinge':
            self.loss = HingeLoss(regularizer)
        elif loss == 'squared':
            self.loss = SquaredLoss(regularizer)
        else:
            raise ValueError('Loss function {} is not defined'.format(loss))

        self.model = None
        self.question = question

    def fit(self, features, targets, batch_size=None, max_iter=1000):
        """
        Fits a gradient descent learner to the features and targets. The
        pseudocode for the fitting algorithm is as follow:
          - Initialize the model parameters to uniform random values in the
            interval [-0.1, +0.1].
          - While not converged:
            - Compute the gradient of the loss with respect to the current
              batch.
            - Update the model parameters by moving them in the direction
              opposite to the current gradient. Use the learning rate as the
              step size.
        For the convergence criteria, compute the loss over all examples. If
        this loss changes by less than 1e-4 during an update, assume that the
        model has converged. If this convergence criteria has not been met
        after max_iter iterations, also assume convergence and terminate.

        You should include a bias term by APPENDING a column of 1s to your
        feature matrix. The bias term is then the last value in self.model.

        Arguments:
            features - (np.array) An Nxd array of features, where N is the
                number of examples and d is the number of features.
            targets - (np.array) A 1D array of targets of length N.
            batch_size - (int or None) The number of examples used in each
                iteration. If None, use all of the examples in each update.
            max_iter - (int) The maximum number of updates to perform.
        Modifies:
            self.model - (np.array) A 1D array of model parameters of length
                d+1. The +1 refers to the bias term.
        """
        accuracy_list = []
        loss_list = []
        iteration_list = []
        self.model = np.zeros(features[0].shape)
        for i in range(len(self.model)):
            self.model[i] = random.uniform(-0.1, 0.1)
        iteration = 0
        loss = None
        while iteration < max_iter:
            if batch_size is None:
                sample_features = features
                sample_targets = targets
            else:
                sample_features = random.sample(features, batch_size)
                sample_targets = random.sample(targets, batch_size)

            self.model = self.model - self.learning_rate * \
                self.loss.backward(sample_features, self.model, sample_targets)
            new_loss = self.loss.forward(sample_features, self.model,
                                         sample_targets)
            if loss is not None and abs(new_loss - loss) < 1e-4:
                break
            loss = new_loss
            loss_list.append(loss)
            accuracy = metrics.accuracy(targets, self.predict(features))
            accuracy_list.append(accuracy)
            iteration_list.append(iteration)
            iteration += 1
        plt.figure()
        plt.plot(iteration_list, loss_list, color='orange', label='Loss')
        plt.plot(iteration_list, accuracy_list, color='blue', label='Accuracy')
        plt.title('Loss & Accuracy Vs. Iteration No.')
        plt.xlabel('Iteration')
        plt.ylabel('Loss & Accuracy')
        plt.legend(loc="best")
        if self.question == '1a':
            plt.savefig("Q1a.png")
        if self.question == '1b':
            plt.savefig("Q1b.png")

    def predict(self, features):
        """
        Predicts the class labels of each example in features. Model output
        values at and above 0 are predicted to have label +1. Non-positive
        output values are predicted to have label -1.

        NOTE: your predict function should make use of your confidence
        function (see below).

        Arguments:
            features - (np.array) A Nxd array of features, where N is the
                number of examples and d is the number of features.
        Returns:
            predictions - (np.array) A 1D array of predictions of length N,
                where index d corresponds to the prediction of row N of
                features.
        """
        confidence = self.confidence(features)
        predictions = np.zeros(confidence.shape)
        for i in range(len(confidence)):
            predictions[i] = np.sign(confidence[i])

        return predictions

    def confidence(self, features):
        """
        Returns the raw model output of the prediction. In other words, rather
        than predicting +1 for values above 0 and -1 for other values, this
        function returns the original, unquantized value.

        Arguments:
            features - (np.array) A Nxd array of features, where N is the
                number of examples and d is the number of features.
        Returns:
            confidence - (np.array) A 1D array of confidence values of length
                N, where index d corresponds to the confidence of row N of
                features.
        """
        confidence = features.dot(self.model)
        return confidence