Пример #1
0
def main():
    data = PetsDataset("/home/helmuth/dlvc/cifar-10-batches-py", Subset.TRAINING)
    # ops chain
    op = ops.chain([
        ops.vectorize(),
        ops.type_cast(np.float32),
        ops.add(-127.5),
        ops.mul(1/127.5),
    ])
    # batch generator #1
    bg1 = BatchGenerator(data, len(data), False)
    assert(len(bg1) == 1)
    # batch generator #2
    bg2 = BatchGenerator(data, 500, False, op)
    assert(len(bg2) == 16)
    # first batch
    cnt = 0
    for batch in bg2:
        cnt += 1
        if cnt < 16:
            assert(batch.data.shape == (500, 3072))
            assert(batch.labels.shape == (500,))
        assert(batch.data.dtype == np.float32)
        assert(np.issubdtype(batch.labels.dtype, np.integer))
        if cnt == 1:
            print("First batch, first sample, not shuffled")
            print(batch.data[0])
    # batch generator #3
    bg3 = BatchGenerator(data, 500, True, op)
    # run 5 times through first sample of shuffled batch generator
    for i in range(5):
        it = iter(bg3)
        print("First batch, first sample, shuffled")
        print(next(it).data[0])
 def test_correctness_of_data_for_train(self):
     op = ops.chain([
         ops.vectorize(),
         ops.type_cast(np.float32)
     ])
     dataset = PetsDataset(os.path.join(os.getcwd(), self._data_dir), Subset.TRAINING)
     one_batch_gen = BatchGenerator(dataset, 7959, False, op)
     self.assertEqual(len(one_batch_gen), 1)
     many_batch_gen = BatchGenerator(dataset, 500, False, op)
     self.assertEqual(len(many_batch_gen), 16)
     reference = [116., 125., 125., 91., 101.]
     batch_iter = iter(many_batch_gen)
     batch_iter = next(batch_iter)
     [self.assertEqual(item, reference[i]) for i, item in enumerate(batch_iter.data[0][:5])]
def load_dataset_into_batches(file_dir_path: str, subset: Subset, subset_size: int, shuffle: bool = False):
    op = ops.chain([
        ops.vectorize(),
        ops.type_cast(np.float32)
    ])
    dataset = PetsDataset(file_dir_path, subset)
    return BatchGenerator(dataset, subset_size, shuffle, op)
Пример #4
0
 def test_shuffle(self):
     dataset = PetsDataset(os.path.join(os.getcwd(), self._data_dir),
                           Subset.TRAINING)
     batch_set = BatchGenerator(dataset, 100, True)
     self.assertEqual(len(batch_set), 80)
     iter_gen = iter(batch_set)
     iter_result = next(iter_gen)
     self.assertFalse(iter_result.idx[0] == 9)
     iter_result = next(iter_gen)
     self.assertFalse(iter_result.idx[0] == 607)
Пример #5
0
 def test_create_batch(self):
     dataset = PetsDataset(os.path.join(os.getcwd(), self._data_dir),
                           Subset.TRAINING)
     batch_set = BatchGenerator(dataset, 100, False)
     self.assertEqual(len(batch_set), 80)
     iter_gen = iter(batch_set)
     iter_result = next(iter_gen)
     self.assertEqual(iter_result.idx[0], 9)
     iter_result = next(iter_gen)
     self.assertEqual(iter_result.idx[0], 607)
Пример #6
0
 def test_data_transformation(self):
     op = ops.chain([ops.vectorize(), ops.type_cast(np.float32)])
     dataset = PetsDataset(os.path.join(os.getcwd(), self._data_dir),
                           Subset.TRAINING)
     batch_gen = BatchGenerator(dataset, 100, False, op)
     self.assertEqual(len(batch_gen), 80)
     iter_gen = iter(batch_gen)
     iter_result = next(iter_gen)
     self.assertEqual(iter_result.data[0].shape, (3072, ))
     self.assertTrue(np.issubdtype(iter_result.data.dtype, np.float32))
 def test_train_with_proper_data(self):
     op = ops.chain([
         ops.vectorize(),
         ops.type_cast(np.float32)
     ])
     dataset = PetsDataset(os.path.join(os.getcwd(), self._data_dir), Subset.TRAINING)
     batch_gen = BatchGenerator(dataset, 7959, False, op)
     batch_iter = iter(batch_gen)
     iter_result = next(batch_iter)
     classifier = KnnClassifier(10, 3072, 2)
     classifier.train(iter_result.data, iter_result.label)
 def test_train_with_wrong_type_of_labels(self):
     op = ops.chain([
         ops.vectorize(),
         ops.type_cast(np.float32)
     ])
     dataset = PetsDataset(os.path.join(os.getcwd(), self._data_dir), Subset.TRAINING)
     batch_gen = BatchGenerator(dataset, 7959, False, op)
     batch_iter = iter(batch_gen)
     iter_result = next(batch_iter)
     classifier = KnnClassifier(10, 3072, 2)
     self.assertRaises(TypeError, classifier.train, iter_result.data, [0, 1, 0])
 def test_train_wrong_vector_size_in_data(self):
     op = ops.chain([
         ops.vectorize(),
         ops.type_cast(np.float32)
     ])
     dataset = PetsDataset(os.path.join(os.getcwd(), self._data_dir), Subset.TRAINING)
     batch_gen = BatchGenerator(dataset, 7959, False, op)
     batch_iter = iter(batch_gen)
     iter_result = next(batch_iter)
     classifier = KnnClassifier(10, 3072, 2)
     changed_data = np.delete(iter_result.data, 100, 1)
     self.assertRaises(RuntimeError, classifier.train, changed_data, iter_result.label)
    def test_predict_with_proper_data(self):

        op = ops.chain([
            ops.vectorize(),
            ops.type_cast(np.float32)
        ])
        dataset_training = PetsDataset(os.path.join(os.getcwd(), self._data_dir), Subset.TRAINING)
        dataset_valid = PetsDataset(os.path.join(os.getcwd(), self._data_dir), Subset.VALIDATION)

        batch_gen_t = BatchGenerator(dataset_training, 795, False, op)
        batch_gen_v = BatchGenerator(dataset_valid, 204, False, op)

        batch_iter_t = iter(batch_gen_t)
        iter_result_t = next(batch_iter_t)

        batch_iter_v = iter(batch_gen_v)
        iter_result_v = next(batch_iter_v)

        classifier = KnnClassifier(10, 3072, 2)
        classifier.train(iter_result_t.data, iter_result_t.label)
        results = classifier.predict(iter_result_v.data)
        self.assertEqual(len(results), 204)
        for result in results:
            self.assertEqual(np.sum(result), 1.0)
Пример #11
0
    assert str(dataset_training[index].label) == label, "Label of index " + str(index) + " is not correct: it is " + \
        str(dataset_training[index].label) + ", expected: " + str(label) + "."

# Make sure that the color channels are in BGR order (not RGB) by displaying the images and verifying the colors are correct (cv2.imshow, cv2.imwrite)
# -> you should see a dog on a blue blanket here
sample = dataset_training[1337]
cv2.imwrite('dog_on_a_blue_blanket.jpg', sample.data)
#cv2.imshow("image", sample.data)
#cv2.waitKey()

##########################################
#                 PART 2                 #
##########################################

# The number of training batches is 1 if the batch size is set to the number of samples in the dataset
batch_generator = BatchGenerator(dataset_training, len(dataset_training),
                                 False)
num_of_batches = len(batch_generator)
expected = 1
assert num_of_batches == expected, "Number of batches is " + str(
    num_of_batches) + ", expected: " + str(expected)

# The number of training batches is 16 if the batch size is set to 500
batch_generator = BatchGenerator(dataset_training, 500, False)
num_of_batches = len(batch_generator)
expected = 16
assert num_of_batches == expected, "Number of batches is " + str(
    num_of_batches) + ", expected: " + str(expected)

# The data and label shapes are (500, 3072) and (500,), respectively, unless for the last batch
batch_generator = BatchGenerator(dataset_training,
                                 500,
Пример #12
0
DATA_PATH = "../cifar-10-batches-py/"
MODEL_PATH = "best_model.pt"
train_data = PetsDataset(DATA_PATH, Subset.TRAINING)
val_data = PetsDataset(DATA_PATH, Subset.VALIDATION)

op = ops.chain([
    ops.type_cast(np.float32),
    ops.add(-127.5),
    ops.mul(1 / 127.5),
    ops.hflip(),
    ops.rcrop(32, 4, 'constant'),
    ops.add_noise(),
    ops.hwc2chw()
])

train_batches = BatchGenerator(train_data, 128, False, op)
val_batches = BatchGenerator(val_data, 128, False, op)


class Net(nn.Module):
    def __init__(self, img_size, num_classes):
        super(Net, self).__init__()
        self.img_size = img_size

        # Instantiate the ReLU nonlinearity
        self.relu = nn.ReLU()

        # Instantiate two convolutional blocks
        ## Block 1
        self.conv1_1 = nn.Conv2d(
            in_channels=3, out_channels=32, kernel_size=3, padding=1)
Пример #13
0
from dlvc.test import Accuracy
import numpy as np

dir = '/Users/mmatak/dev/college/DLVC/cifar-10/cifar-10-batches-py/'

IMAGE_HEIGHT = 32
IMAGE_WIDTH = 32
NUM_CHANNELS = 3

NUM_CLASSES = 2

pets_training = PetsDataset(dir, Subset.TRAINING)
pets_validation = PetsDataset(dir, Subset.VALIDATION)
pets_test = PetsDataset(dir, Subset.TEST)

batchGenerator_training = BatchGenerator(pets_training, len(pets_training), False,
                                         op=chain([type_cast(dtype=np.float32), vectorize()]))
batchGenerator_validation = BatchGenerator(pets_validation, len(pets_validation), False,
                                         op=chain([type_cast(dtype=np.float32), vectorize()]))
batchGenerator_test = BatchGenerator(pets_test, len(pets_test), False,
                                         op=chain([type_cast(dtype=np.float32), vectorize()]))

best_accuracy = Accuracy()
best_k = -1
results = {}
knn = None

for k in range(1, 100, 40):  # grid search example
    knn = KnnClassifier(k, IMAGE_HEIGHT*IMAGE_WIDTH*NUM_CHANNELS, NUM_CLASSES)
    accuracy = Accuracy()

    # train and compute validation accuracy ...
Пример #14
0
BATCH_SIZE = 128
NUM_CLASSES = 2
EPOCHS = 500
lr = 0.001
# weight decay 0 in this configuration, in part 3 this is changed
wd = 0.0

pets_training = PetsDataset(dir, Subset.TRAINING)
pets_validation = PetsDataset(dir, Subset.VALIDATION)
pets_test = PetsDataset(dir, Subset.TEST)


batchGenerator_training = BatchGenerator(pets_training, BATCH_SIZE, shuffle=True,
                                         op=chain([type_cast(dtype=np.float32),
                                                   add(-127.5),
                                                   mul(1 / 127.5),
                                                   hwc2chw()]))
batchGenerator_validation = BatchGenerator(pets_validation, BATCH_SIZE, shuffle=False,
                                         op=chain([type_cast(dtype=np.float32),
                                                   add(-127.5),
                                                   mul(1 / 127.5),
                                                   hwc2chw()]))
batchGenerator_test = BatchGenerator(pets_test, BATCH_SIZE, shuffle=False,
                                         op=chain([type_cast(dtype=np.float32),
                                                   add(-127.5),
                                                   mul(1 / 127.5),
                                                   hwc2chw()]))


class CatDogNet(nn.Module):
Пример #15
0
# Step 1: load the data sets (TRAIN, VALIDATION)
train_data = PetsDataset("../cifar-10-batches-py", Subset.TRAINING)
val_data = PetsDataset("../cifar-10-batches-py", Subset.VALIDATION)

# Operations to standardize
# scale to sample mean=0, sd=1
# calculate average training sample mean & sd
op_calc = ops.chain([
    ops.type_cast(np.float32),
    ops.mean_sd()
])
# using batch generator (could do it directly but I'm lazy)
train_full_batch_gen = BatchGenerator(
    train_data,
    len(train_data),
    False,
    op_calc)
train_full_batch = next(b for b in train_full_batch_gen)
train_mean_sd = np.mean(train_full_batch.data, axis=0)
# create operation to scale
op2 = ops.chain([
    ops.type_cast(np.float32),
    ops.scale(train_mean_sd[0], train_mean_sd[1]),
    ops.hwc2chw()
])
# include augmentation: crop only
op2_augmented = ops.chain([
    ops.type_cast(np.float32),
    ops.scale(train_mean_sd[0], train_mean_sd[1]),
    ops.rcrop(32, 5, 'mean'),
Пример #16
0
# Step 1: load the data sets (TRAIN, VALIDATION & TEST)
train_data = PetsDataset("../cifar-10-batches-py", Subset.TRAINING)
val_data = PetsDataset("../cifar-10-batches-py", Subset.VALIDATION)
test_data = PetsDataset("../cifar-10-batches-py", Subset.TEST)

# Operations to standardize
op = ops.chain([
    ops.vectorize(),
    ops.type_cast(np.float32),
    ops.add(-127.5),
    ops.mul(1/127.5),
])
# Step 2: Create batch generator for each
BATCH_SIZE = 512
train_batches = BatchGenerator(train_data, BATCH_SIZE, True, op)
val_batches = BatchGenerator(val_data, BATCH_SIZE, True, op)
test_batches = BatchGenerator(test_data, BATCH_SIZE, True, op)

def train_model(lr: float, momentum: float) -> TrainedModel:
    '''
    Trains a linear classifier with a given learning rate (lr) and momentum.
    Computes the accuracy on the validation set.
    Returns both the trained classifier and accuracy.
    '''

    # Step 3: train linear classifier, 10 epochs
    clf = LinearClassifier(3072, train_data.num_classes(), lr, momentum, True)

    n_epochs = 10
    for i in range(n_epochs):
Пример #17
0
def train(lr, wd, operation):
    print("Training a network with:")
    print("Weight Decay = {}".format(wd))
    print("Augmentation = {}".format(operation))
    print("Learning Rate = {}".format(lr))

    device = torch.device("cuda" if CUDA else "cpu")

    img_shape = train_data.image_shape()
    num_classes = train_data.num_classes()

    net = Net(img_shape, num_classes).to(device)
    clf = CnnClassifier(net, (0, *img_shape), num_classes, lr, wd)

    op = operations[operation]
    train_batches = BatchGenerator(train_data, 128, False, op)
    val_batches = BatchGenerator(val_data, 128, False, op)

    not_improved_since = 0
    best_accuracy = 0
    best_loss = 0
    stop_epoch = 0

    for epoch in range(NR_EPOCHS):
        print("Epoch {}/{}".format(epoch, NR_EPOCHS), end="\r")
        losses = []
        for batch in train_batches:
            loss = clf.train(batch.data, batch.label)
            losses.append(loss)
        losses = np.array(losses)
        mean = round(np.mean(losses), 3)
        std = round(np.std(losses), 3)

        accuracy = Accuracy()
        for batch in val_batches:
            predictions = clf.predict(batch.data)
            accuracy.update(predictions, batch.label)
        acc = round(accuracy.accuracy(), 3)
        # Early stopping
        if acc > best_accuracy:
            stop_epoch = epoch
            not_improved_since = 0
            best_accuracy = acc
            best_loss = mean
        else:
            not_improved_since += 1
        if not_improved_since > EARLY_STOPPING: # if not improved since 5 epochs stop training
            break
    print()
    print("Best val accuracy after epoch {}".format(stop_epoch + 1))
    print("Validation Accuracy: {}".format(best_accuracy))
    print("Train Loss: {}".format(best_loss))

    with open(RESULTS_FILE, "a") as file:
        file.write("Trained a network with:\n")
        file.write("Weight Decay = {}\n".format(wd))
        file.write("Augmentation = {}\n".format(operation))
        file.write("Learning Rate = {}\n".format(lr))
        file.write("---\n")
        file.write("Best val accuracy after epoch {}\n".format(stop_epoch + 1))
        file.write("Validation Accuracy: {}\n".format(best_accuracy))
        file.write("Train Loss: {}\n".format(best_loss))
        file.write("\n#################################\n")
Пример #18
0
# make sure the whole pipeline works:
#  when k=1 and
#  training and predict subset are equal and
#  kNN must have accuracy 100%

start = time.time()

pets = PetsDataset(
    '/Users/mmatak/dev/college/DLVC/cifar-10/cifar-10-batches-py/',
    Subset.TEST)
num_classes = 2
k = 1
knn = KnnClassifier(k, 32 * 32 * 3, num_classes)
batchGenerator = BatchGenerator(pets,
                                512,
                                False,
                                op=chain(
                                    [type_cast(dtype=np.float32),
                                     vectorize()]))

groundTruthLabels = None
for batch in batchGenerator:
    knn.train(batch.data, batch.label)
    groundTruthLabels = batch.label

predictedLabels = None


def measure_accuracy(predictedLabels: np.ndarray,
                     groundTruthLabels: np.ndarray):
    correct = 0
    for index, trueLabel in enumerate(groundTruthLabels):
Пример #19
0
# TODO implement steps 1-2

data_path = ""  #something ending with "...\\cifar-10-batches.py"
trainingDataset = PetsDataset(data_path, Subset.TRAINING)
validationDataset = PetsDataset(data_path, Subset.VALIDATION)
testDataset = PetsDataset(data_path, Subset.TEST)

op = chain([
    vectorize(),
    type_cast(np.float32),
    add(-127.5),
    mul(1 / 127.5),
])

bg_training = BatchGenerator(dataset=trainingDataset,
                             num=32,
                             shuffle=True,
                             op=op)
bg_validation = BatchGenerator(dataset=validationDataset,
                               num=32,
                               shuffle=True,
                               op=op)
bg_test = BatchGenerator(dataset=testDataset, num=32, shuffle=True, op=op)


def random_search(lr_max=1, lr_min=0.9, momentum_max=1, momentum_min=0.9):
    random_lr = (lr_max - lr_min) * np.random.random_sample() + lr_min
    random_momentum = (momentum_max -
                       momentum_min) * np.random.random_sample() + momentum_min
    return (random_lr, random_momentum)

Пример #20
0
random_accuracy = Accuracy()
validation_accuracy = Accuracy()
train_accuracy = Accuracy()

print('Number of Classes = {}'.format(pets_train.num_classes()))
print('Number of Images = {}'.format(pets_train.__len__()))
print('First 10 Classes >>> {}'.format(pets_train.labels[:10]))

op = ops.chain([
    ops.vectorize(),
    ops.type_cast(np.float32),
    ops.add(-127.5),
    ops.mul(1 / 127.5),
])

train_batches = BatchGenerator(pets_train, 100, False, op)
validation_batches = BatchGenerator(pets_val, 100, False, op)

print('Number of Batches = {}'.format(train_batches.__len__()))

model = LinearClassifier(3072,
                         pets_train.num_classes(),
                         lr=0.001,
                         momentum=0.1,
                         nesterov=True)

# test batch generator
for batch in train_batches:
    print('Shape of the data batch: {}'.format(batch.data.shape))
    print('Shape of the label batch: {}'.format(batch.label.shape))
    print('First 5 Elements of the first element: {}'.format(
Пример #21
0
TrainedModel = namedtuple('TrainedModel', ['model', 'accuracy'])


train_data = PetsDataset("../cifar-10-batches-py/", Subset.TRAINING)
val_data = PetsDataset("../cifar-10-batches-py/", Subset.VALIDATION)
test_data = PetsDataset('../cifar-10-batches-py/', Subset.TEST)


op = ops.chain([
    ops.vectorize(),
    ops.type_cast(np.float32),
    ops.add(-127.5),
    ops.mul(1/127.5),
])

train_batches = BatchGenerator(train_data, 50, False, op)
val_batches = BatchGenerator(val_data, 50, False, op)
test_batches = BatchGenerator(test_data, 50, False, op)

def train_model(lr: float, momentum: float) -> TrainedModel:
    '''
    Trains a linear classifier with a given learning rate (lr) and momentum.
    Computes the accuracy on the validation set.
    Returns both the trained classifier and accuracy.
    '''

    clf = LinearClassifier(input_dim=3072,  num_classes=train_data.num_classes(), lr=lr, momentum=momentum, nesterov=False)

    n_epochs = 10
    for i in range(n_epochs):
        for batch in train_batches:            
Пример #22
0
        rcrop(25, 2, 'median'),
        resize(input_size, pad_mode_for_resizing),
        hwc2chw()
    ])
else:
    net = CatDogNet()
    op_chain = chain([
        type_cast(dtype=np.float32),
        add(-127.5),
        mul(1 / 127.5),
        rcrop(25, 2, 'median'),
        hwc2chw()
    ])

batchGenerator_training = BatchGenerator(pets_training,
                                         BATCH_SIZE,
                                         shuffle=True,
                                         op=op_chain)
batchGenerator_validation = BatchGenerator(pets_validation,
                                           BATCH_SIZE,
                                           shuffle=False,
                                           op=op_chain)

clf = CnnClassifier(net, (BATCH_SIZE, NUM_CHANNELS, IMAGE_HEIGHT, IMAGE_WIDTH),
                    NUM_CLASSES, lr, wd)
loss_list = []
best_accuracy = 0.0
accuracy = Accuracy()
epochs_since_best_accuracy = 0
for epoch in range(0, EPOCHS):
    print("Epoche: ", epoch + 1)
Пример #23
0
    ops.add(-127.5),
    ops.mul(1 / 127.5),
    ops.hflip(),
    ops.rcrop(32, 4, 'constant'),
    ops.add_noise(),
    ops.hwc2chw()
])

reverse_op = ops.chain([
    ops.chw2hwc(),
    ops.mul(127.5),
    ops.add(127.5),
    ops.type_cast(np.uint8),
])

train_batches = BatchGenerator(pets_train, 100, False, op)


class Net(nn.Module):
    """
    """
    def __init__(self, num_classes):
        super(Net, self).__init__()

        # Instantiate the ReLU nonlinearity
        self.relu = nn.ReLU()

        # Instantiate two convolutional layers
        self.conv1 = nn.Conv2d(in_channels=3,
                               out_channels=5,
                               kernel_size=3,