def test_tensor(self):
        loader = DataLoader(TensorDataset(), batch_size=256)

        x, = dl_utils.flatten(loader)

        assert torch.is_tensor(x)
        assert x.shape == torch.Size([DATASET_SIZE, DATA_SIZE, DATA_SIZE])
def find_best_k(ds_train: Dataset, k_choices, num_folds):
    Use cross validation to find the best K for the kNN model.

    :param ds_train: Training dataset.
    :param k_choices: A sequence of possible value of k for the kNN model.
    :param num_folds: Number of folds for cross-validation.
    :return: tuple (best_k, accuracies) where:
        best_k: the value of k with the highest mean accuracy across folds
        accuracies: The accuracies per fold for each k (list of lists).

    accuracies = []

    for i, k in enumerate(k_choices):

        model = KNNClassifier(k)

        # TODO: Train model num_folds times with different train/val data.
        # Don't use any third-party libraries.
        # You can use your train/validation splitter from part 1 (even if
        # that means that it's not really k-fold CV since it will be a
        # different split each iteration), or implement something else.

        # ====== YOUR CODE: ======
        accuracies_for_k = []
        for j in range(num_folds):
            removed_offset = len(ds_train) * j // num_folds
            removed_len = min(
                len(ds_train) // num_folds,
                len(ds_train) - removed_offset)
            ds_actual_train = WithoutSubsetDataset(ds_train,
            ds_actual_valid = datasets.SubsetDataset(ds_train,

            knn_classifier = KNNClassifier(k=k)

            batch_size = 1024
      , batch_size))

            x_valid, y_valid = dataloader_utils.flatten(
      , batch_size))

            y_pred = knn_classifier.predict(x_valid)

            # Calculate accuracy
            valid_accuracy = accuracy(y_valid, y_pred)

        # ========================

    best_k_idx = np.argmax([np.mean(acc) for acc in accuracies])
    best_k = k_choices[best_k_idx]

    return best_k, accuracies
    def test_two_tuple(self):
        loader = DataLoader(TensorTwoTupleDataset(), batch_size=256)

        x, y = dl_utils.flatten(loader)

        assert torch.is_tensor(x)
        assert torch.is_tensor(y)
        assert x.shape == torch.Size([DATASET_SIZE, DATA_SIZE, DATA_SIZE])
        assert y.shape == torch.Size([DATASET_SIZE, DATA_SIZE, 1])
    def test_three_tuple(self):
        loader = DataLoader(TensorThreeTupleDataset(), batch_size=128)

        x, y, z = dl_utils.flatten(loader)

        assert torch.is_tensor(x)
        assert torch.is_tensor(y)
        assert torch.is_tensor(z)
        assert x.shape == torch.Size([DATASET_SIZE, DATA_SIZE, DATA_SIZE])
        assert x.shape == y.shape
        assert z.shape == torch.Size([DATASET_SIZE, DATA_SIZE, 1])
def implementKNN(classifier: KNNClassifier, dl_train, dl_test):
    # Get all test data to predict in one go
    x_test, y_test = dataloader_utils.flatten(dl_test)

    # Test kNN Classifier
    #     knn_classifier = KNNClassifier(k=100)
    y_pred = classifier.predict(x_test)

    # Calculate accuracy
    acc = accuracy(y_test, y_pred)

    return acc
    def train(self, dl_train: DataLoader):
        Trains the KNN model. KNN training is memorizing the training data.
        Or, equivalently, the model parameters are the training data itself.
        :param dl_train: A DataLoader with labeled training sample (should
            return tuples).
        :return: self

        x_train, y_train = dataloader_utils.flatten(dl_train)
        self.x_train = x_train
        self.y_train = y_train
        self.n_classes = len(set(y_train.numpy()))
        return self
mean_acc = 0
for (x,y) in dl_test:
    y_pred, _ = lin_cls.predict(x)
    mean_acc += lin_cls.evaluate_accuracy(y, y_pred)
mean_acc /= len(dl_test)

print(f"Accuracy: {mean_acc:.1f}%")

import cs236605.dataloader_utils as dl_utils
from hw1.losses import SVMHingeLoss

# Create a hinge-loss function
loss_fn = SVMHingeLoss(delta=1)

# Classify all samples in the test set (because it doesn't depend on initialization)
x, y = dl_utils.flatten(dl_test)
y_pred, x_scores = lin_cls.predict(x)
loss = loss_fn(x, y, x_scores, y_pred)

# Compare to pre-computed expected value as a test
expected_loss = 8.9579
print("loss =", loss.item())
print('diff =', abs(loss.item()-expected_loss))
test.assertAlmostEqual(loss.item(), expected_loss, delta=1e-1)

from hw1.losses import SVMHingeLoss

# Create a hinge-loss function
loss_fn = SVMHingeLoss(delta=1.)

# Compute loss and gradient
    def train(self,
              dl_train: DataLoader,
              dl_valid: DataLoader,
              loss_fn: ClassifierLoss,

        Result = namedtuple('Result', 'accuracy loss')
        train_res = Result(accuracy=[], loss=[])
        valid_res = Result(accuracy=[], loss=[])

        print('Training', end='')
        for epoch_idx in range(max_epochs):
            #for epoch_idx in range(3):

            # TODO: Implement model training loop.
            # At each epoch, evaluate the model on the entire training set
            # (batch by batch) and update the weights.
            # Each epoch, also evaluate on the validation set.
            # Accumulate average loss and total accuracy for both sets.
            # The train/valid_res variables should hold the average loss and
            # accuracy per epoch.
            # Don't forget to add a regularization term to the loss, using the
            # weight_decay parameter.

            total_correct = 0
            average_loss = 0

            # ====== YOUR CODE: ======

            import cs236605.dataloader_utils as dataloader_utils

            # Iterate trough train batches and do GD step for each batch
            num_of_batches = 0
            for (x_train, y_train) in dl_train:
                num_of_batches += 1

                # Calc batch loss and accuracy and accumulate them.
                y_predicted, x_scores = self.predict(x_train)
                batch_accuracy = self.evaluate_accuracy(y_train, y_predicted)
                batch_loss = loss_fn.loss(x_train, y_train, x_scores,
                average_loss += batch_loss
                total_correct += batch_accuracy

                # Calc the grad of loos, add Regularization factor, GD step.
                loss_grad = loss_fn.grad()
                loss_grad += torch.mul(loss_grad, weight_decay)
                grad_step = torch.mul(loss_grad, learn_rate)
                self.weights = self.weights - grad_step

            # Calculate accuracy and loss on validation set
            x_valid, y_valid = dataloader_utils.flatten(dl_valid)
            y_predicted_valid, x_scores_valid = self.predict(x_valid)
            accuracy_valid = self.evaluate_accuracy(y_valid, y_predicted_valid)
            valid_loss = loss_fn.loss(x_valid, y_valid, x_scores_valid,

            # Calc avg loss and acc across all train batches.
            # Append train/valid loss and acc to lists.
            average_loss = average_loss / num_of_batches
            total_correct = total_correct / num_of_batches
            # ========================
            print('.', end='')

        return train_res, valid_res
    def train(self,
              dl_train: DataLoader,
              dl_valid: DataLoader,
              loss_fn: ClassifierLoss,

        Result = namedtuple('Result', 'accuracy loss')
        train_res = Result(accuracy=[], loss=[])
        valid_res = Result(accuracy=[], loss=[])

        print('Training', end='')
        for epoch_idx in range(max_epochs):

            # TODO: Implement model training loop.
            # At each epoch, evaluate the model on the entire training set
            # (batch by batch) and update the weights.
            # Each epoch, also evaluate on the validation set.
            # Accumulate average loss and total accuracy for both sets.
            # The train/valid_res variables should hold the average loss and
            # accuracy per epoch.
            # Don't forget to add a regularization term to the loss, using the
            # weight_decay parameter.

            total_correct = 0
            average_loss = 0

            # ====== YOUR CODE: ======

            train_loss = 0
            train_accuracy = 0
            n_samples_total = 0

            for idx, (x_train, y_train) in enumerate(dl_train):
                y_train_pred, train_class_scores = self.predict(x_train)

                w_norm = torch.sum(torch.mul(self.weights,
                train_loss_batch = loss_fn(
                    x_train, y_train, train_class_scores,
                    y_train_pred).item() + weight_decay / 2.0 * w_norm
                train_accuracy_batch = self.evaluate_accuracy(
                    y_train, y_train_pred)

                n_samples_total += x_train.shape[0]
                train_loss += train_loss_batch * float(x_train.shape[0])
                train_accuracy += train_accuracy_batch * float(

                grad = loss_fn.grad() + weight_decay * self.weights
                self.weights -= learn_rate * grad

            train_loss /= n_samples_total
            train_accuracy /= n_samples_total

            print('Epoch', epoch_idx, 'training loss', train_loss,
                  'training accuracy', train_accuracy)

            x_valid, y_valid = dl_utils.flatten(dl_valid)
            y_valid_pred, valid_class_scores = self.predict(x_valid)

            valid_loss = loss_fn(x_valid, y_valid, valid_class_scores,
            valid_accuracy = self.evaluate_accuracy(y_valid, y_valid_pred)


            # ========================
            print('.', end='')

        return train_res, valid_res