def main(): data = PetsDataset("/home/helmuth/dlvc/cifar-10-batches-py", Subset.TRAINING) # ops chain op = ops.chain([ ops.vectorize(), ops.type_cast(np.float32), ops.add(-127.5), ops.mul(1/127.5), ]) # batch generator #1 bg1 = BatchGenerator(data, len(data), False) assert(len(bg1) == 1) # batch generator #2 bg2 = BatchGenerator(data, 500, False, op) assert(len(bg2) == 16) # first batch cnt = 0 for batch in bg2: cnt += 1 if cnt < 16: assert(batch.data.shape == (500, 3072)) assert(batch.labels.shape == (500,)) assert(batch.data.dtype == np.float32) assert(np.issubdtype(batch.labels.dtype, np.integer)) if cnt == 1: print("First batch, first sample, not shuffled") print(batch.data[0]) # batch generator #3 bg3 = BatchGenerator(data, 500, True, op) # run 5 times through first sample of shuffled batch generator for i in range(5): it = iter(bg3) print("First batch, first sample, shuffled") print(next(it).data[0])
def test_correctness_of_data_for_train(self): op = ops.chain([ ops.vectorize(), ops.type_cast(np.float32) ]) dataset = PetsDataset(os.path.join(os.getcwd(), self._data_dir), Subset.TRAINING) one_batch_gen = BatchGenerator(dataset, 7959, False, op) self.assertEqual(len(one_batch_gen), 1) many_batch_gen = BatchGenerator(dataset, 500, False, op) self.assertEqual(len(many_batch_gen), 16) reference = [116., 125., 125., 91., 101.] batch_iter = iter(many_batch_gen) batch_iter = next(batch_iter) [self.assertEqual(item, reference[i]) for i, item in enumerate(batch_iter.data[0][:5])]
def load_dataset_into_batches(file_dir_path: str, subset: Subset, subset_size: int, shuffle: bool = False): op = ops.chain([ ops.vectorize(), ops.type_cast(np.float32) ]) dataset = PetsDataset(file_dir_path, subset) return BatchGenerator(dataset, subset_size, shuffle, op)
def test_shuffle(self): dataset = PetsDataset(os.path.join(os.getcwd(), self._data_dir), Subset.TRAINING) batch_set = BatchGenerator(dataset, 100, True) self.assertEqual(len(batch_set), 80) iter_gen = iter(batch_set) iter_result = next(iter_gen) self.assertFalse(iter_result.idx[0] == 9) iter_result = next(iter_gen) self.assertFalse(iter_result.idx[0] == 607)
def test_create_batch(self): dataset = PetsDataset(os.path.join(os.getcwd(), self._data_dir), Subset.TRAINING) batch_set = BatchGenerator(dataset, 100, False) self.assertEqual(len(batch_set), 80) iter_gen = iter(batch_set) iter_result = next(iter_gen) self.assertEqual(iter_result.idx[0], 9) iter_result = next(iter_gen) self.assertEqual(iter_result.idx[0], 607)
def test_data_transformation(self): op = ops.chain([ops.vectorize(), ops.type_cast(np.float32)]) dataset = PetsDataset(os.path.join(os.getcwd(), self._data_dir), Subset.TRAINING) batch_gen = BatchGenerator(dataset, 100, False, op) self.assertEqual(len(batch_gen), 80) iter_gen = iter(batch_gen) iter_result = next(iter_gen) self.assertEqual(iter_result.data[0].shape, (3072, )) self.assertTrue(np.issubdtype(iter_result.data.dtype, np.float32))
def test_train_with_proper_data(self): op = ops.chain([ ops.vectorize(), ops.type_cast(np.float32) ]) dataset = PetsDataset(os.path.join(os.getcwd(), self._data_dir), Subset.TRAINING) batch_gen = BatchGenerator(dataset, 7959, False, op) batch_iter = iter(batch_gen) iter_result = next(batch_iter) classifier = KnnClassifier(10, 3072, 2) classifier.train(iter_result.data, iter_result.label)
def test_train_with_wrong_type_of_labels(self): op = ops.chain([ ops.vectorize(), ops.type_cast(np.float32) ]) dataset = PetsDataset(os.path.join(os.getcwd(), self._data_dir), Subset.TRAINING) batch_gen = BatchGenerator(dataset, 7959, False, op) batch_iter = iter(batch_gen) iter_result = next(batch_iter) classifier = KnnClassifier(10, 3072, 2) self.assertRaises(TypeError, classifier.train, iter_result.data, [0, 1, 0])
def test_train_wrong_vector_size_in_data(self): op = ops.chain([ ops.vectorize(), ops.type_cast(np.float32) ]) dataset = PetsDataset(os.path.join(os.getcwd(), self._data_dir), Subset.TRAINING) batch_gen = BatchGenerator(dataset, 7959, False, op) batch_iter = iter(batch_gen) iter_result = next(batch_iter) classifier = KnnClassifier(10, 3072, 2) changed_data = np.delete(iter_result.data, 100, 1) self.assertRaises(RuntimeError, classifier.train, changed_data, iter_result.label)
def test_predict_with_proper_data(self): op = ops.chain([ ops.vectorize(), ops.type_cast(np.float32) ]) dataset_training = PetsDataset(os.path.join(os.getcwd(), self._data_dir), Subset.TRAINING) dataset_valid = PetsDataset(os.path.join(os.getcwd(), self._data_dir), Subset.VALIDATION) batch_gen_t = BatchGenerator(dataset_training, 795, False, op) batch_gen_v = BatchGenerator(dataset_valid, 204, False, op) batch_iter_t = iter(batch_gen_t) iter_result_t = next(batch_iter_t) batch_iter_v = iter(batch_gen_v) iter_result_v = next(batch_iter_v) classifier = KnnClassifier(10, 3072, 2) classifier.train(iter_result_t.data, iter_result_t.label) results = classifier.predict(iter_result_v.data) self.assertEqual(len(results), 204) for result in results: self.assertEqual(np.sum(result), 1.0)
assert str(dataset_training[index].label) == label, "Label of index " + str(index) + " is not correct: it is " + \ str(dataset_training[index].label) + ", expected: " + str(label) + "." # Make sure that the color channels are in BGR order (not RGB) by displaying the images and verifying the colors are correct (cv2.imshow, cv2.imwrite) # -> you should see a dog on a blue blanket here sample = dataset_training[1337] cv2.imwrite('dog_on_a_blue_blanket.jpg', sample.data) #cv2.imshow("image", sample.data) #cv2.waitKey() ########################################## # PART 2 # ########################################## # The number of training batches is 1 if the batch size is set to the number of samples in the dataset batch_generator = BatchGenerator(dataset_training, len(dataset_training), False) num_of_batches = len(batch_generator) expected = 1 assert num_of_batches == expected, "Number of batches is " + str( num_of_batches) + ", expected: " + str(expected) # The number of training batches is 16 if the batch size is set to 500 batch_generator = BatchGenerator(dataset_training, 500, False) num_of_batches = len(batch_generator) expected = 16 assert num_of_batches == expected, "Number of batches is " + str( num_of_batches) + ", expected: " + str(expected) # The data and label shapes are (500, 3072) and (500,), respectively, unless for the last batch batch_generator = BatchGenerator(dataset_training, 500,
DATA_PATH = "../cifar-10-batches-py/" MODEL_PATH = "best_model.pt" train_data = PetsDataset(DATA_PATH, Subset.TRAINING) val_data = PetsDataset(DATA_PATH, Subset.VALIDATION) op = ops.chain([ ops.type_cast(np.float32), ops.add(-127.5), ops.mul(1 / 127.5), ops.hflip(), ops.rcrop(32, 4, 'constant'), ops.add_noise(), ops.hwc2chw() ]) train_batches = BatchGenerator(train_data, 128, False, op) val_batches = BatchGenerator(val_data, 128, False, op) class Net(nn.Module): def __init__(self, img_size, num_classes): super(Net, self).__init__() self.img_size = img_size # Instantiate the ReLU nonlinearity self.relu = nn.ReLU() # Instantiate two convolutional blocks ## Block 1 self.conv1_1 = nn.Conv2d( in_channels=3, out_channels=32, kernel_size=3, padding=1)
from dlvc.test import Accuracy import numpy as np dir = '/Users/mmatak/dev/college/DLVC/cifar-10/cifar-10-batches-py/' IMAGE_HEIGHT = 32 IMAGE_WIDTH = 32 NUM_CHANNELS = 3 NUM_CLASSES = 2 pets_training = PetsDataset(dir, Subset.TRAINING) pets_validation = PetsDataset(dir, Subset.VALIDATION) pets_test = PetsDataset(dir, Subset.TEST) batchGenerator_training = BatchGenerator(pets_training, len(pets_training), False, op=chain([type_cast(dtype=np.float32), vectorize()])) batchGenerator_validation = BatchGenerator(pets_validation, len(pets_validation), False, op=chain([type_cast(dtype=np.float32), vectorize()])) batchGenerator_test = BatchGenerator(pets_test, len(pets_test), False, op=chain([type_cast(dtype=np.float32), vectorize()])) best_accuracy = Accuracy() best_k = -1 results = {} knn = None for k in range(1, 100, 40): # grid search example knn = KnnClassifier(k, IMAGE_HEIGHT*IMAGE_WIDTH*NUM_CHANNELS, NUM_CLASSES) accuracy = Accuracy() # train and compute validation accuracy ...
BATCH_SIZE = 128 NUM_CLASSES = 2 EPOCHS = 500 lr = 0.001 # weight decay 0 in this configuration, in part 3 this is changed wd = 0.0 pets_training = PetsDataset(dir, Subset.TRAINING) pets_validation = PetsDataset(dir, Subset.VALIDATION) pets_test = PetsDataset(dir, Subset.TEST) batchGenerator_training = BatchGenerator(pets_training, BATCH_SIZE, shuffle=True, op=chain([type_cast(dtype=np.float32), add(-127.5), mul(1 / 127.5), hwc2chw()])) batchGenerator_validation = BatchGenerator(pets_validation, BATCH_SIZE, shuffle=False, op=chain([type_cast(dtype=np.float32), add(-127.5), mul(1 / 127.5), hwc2chw()])) batchGenerator_test = BatchGenerator(pets_test, BATCH_SIZE, shuffle=False, op=chain([type_cast(dtype=np.float32), add(-127.5), mul(1 / 127.5), hwc2chw()])) class CatDogNet(nn.Module):
# Step 1: load the data sets (TRAIN, VALIDATION) train_data = PetsDataset("../cifar-10-batches-py", Subset.TRAINING) val_data = PetsDataset("../cifar-10-batches-py", Subset.VALIDATION) # Operations to standardize # scale to sample mean=0, sd=1 # calculate average training sample mean & sd op_calc = ops.chain([ ops.type_cast(np.float32), ops.mean_sd() ]) # using batch generator (could do it directly but I'm lazy) train_full_batch_gen = BatchGenerator( train_data, len(train_data), False, op_calc) train_full_batch = next(b for b in train_full_batch_gen) train_mean_sd = np.mean(train_full_batch.data, axis=0) # create operation to scale op2 = ops.chain([ ops.type_cast(np.float32), ops.scale(train_mean_sd[0], train_mean_sd[1]), ops.hwc2chw() ]) # include augmentation: crop only op2_augmented = ops.chain([ ops.type_cast(np.float32), ops.scale(train_mean_sd[0], train_mean_sd[1]), ops.rcrop(32, 5, 'mean'),
# Step 1: load the data sets (TRAIN, VALIDATION & TEST) train_data = PetsDataset("../cifar-10-batches-py", Subset.TRAINING) val_data = PetsDataset("../cifar-10-batches-py", Subset.VALIDATION) test_data = PetsDataset("../cifar-10-batches-py", Subset.TEST) # Operations to standardize op = ops.chain([ ops.vectorize(), ops.type_cast(np.float32), ops.add(-127.5), ops.mul(1/127.5), ]) # Step 2: Create batch generator for each BATCH_SIZE = 512 train_batches = BatchGenerator(train_data, BATCH_SIZE, True, op) val_batches = BatchGenerator(val_data, BATCH_SIZE, True, op) test_batches = BatchGenerator(test_data, BATCH_SIZE, True, op) def train_model(lr: float, momentum: float) -> TrainedModel: ''' Trains a linear classifier with a given learning rate (lr) and momentum. Computes the accuracy on the validation set. Returns both the trained classifier and accuracy. ''' # Step 3: train linear classifier, 10 epochs clf = LinearClassifier(3072, train_data.num_classes(), lr, momentum, True) n_epochs = 10 for i in range(n_epochs):
def train(lr, wd, operation): print("Training a network with:") print("Weight Decay = {}".format(wd)) print("Augmentation = {}".format(operation)) print("Learning Rate = {}".format(lr)) device = torch.device("cuda" if CUDA else "cpu") img_shape = train_data.image_shape() num_classes = train_data.num_classes() net = Net(img_shape, num_classes).to(device) clf = CnnClassifier(net, (0, *img_shape), num_classes, lr, wd) op = operations[operation] train_batches = BatchGenerator(train_data, 128, False, op) val_batches = BatchGenerator(val_data, 128, False, op) not_improved_since = 0 best_accuracy = 0 best_loss = 0 stop_epoch = 0 for epoch in range(NR_EPOCHS): print("Epoch {}/{}".format(epoch, NR_EPOCHS), end="\r") losses = [] for batch in train_batches: loss = clf.train(batch.data, batch.label) losses.append(loss) losses = np.array(losses) mean = round(np.mean(losses), 3) std = round(np.std(losses), 3) accuracy = Accuracy() for batch in val_batches: predictions = clf.predict(batch.data) accuracy.update(predictions, batch.label) acc = round(accuracy.accuracy(), 3) # Early stopping if acc > best_accuracy: stop_epoch = epoch not_improved_since = 0 best_accuracy = acc best_loss = mean else: not_improved_since += 1 if not_improved_since > EARLY_STOPPING: # if not improved since 5 epochs stop training break print() print("Best val accuracy after epoch {}".format(stop_epoch + 1)) print("Validation Accuracy: {}".format(best_accuracy)) print("Train Loss: {}".format(best_loss)) with open(RESULTS_FILE, "a") as file: file.write("Trained a network with:\n") file.write("Weight Decay = {}\n".format(wd)) file.write("Augmentation = {}\n".format(operation)) file.write("Learning Rate = {}\n".format(lr)) file.write("---\n") file.write("Best val accuracy after epoch {}\n".format(stop_epoch + 1)) file.write("Validation Accuracy: {}\n".format(best_accuracy)) file.write("Train Loss: {}\n".format(best_loss)) file.write("\n#################################\n")
# make sure the whole pipeline works: # when k=1 and # training and predict subset are equal and # kNN must have accuracy 100% start = time.time() pets = PetsDataset( '/Users/mmatak/dev/college/DLVC/cifar-10/cifar-10-batches-py/', Subset.TEST) num_classes = 2 k = 1 knn = KnnClassifier(k, 32 * 32 * 3, num_classes) batchGenerator = BatchGenerator(pets, 512, False, op=chain( [type_cast(dtype=np.float32), vectorize()])) groundTruthLabels = None for batch in batchGenerator: knn.train(batch.data, batch.label) groundTruthLabels = batch.label predictedLabels = None def measure_accuracy(predictedLabels: np.ndarray, groundTruthLabels: np.ndarray): correct = 0 for index, trueLabel in enumerate(groundTruthLabels):
# TODO implement steps 1-2 data_path = "" #something ending with "...\\cifar-10-batches.py" trainingDataset = PetsDataset(data_path, Subset.TRAINING) validationDataset = PetsDataset(data_path, Subset.VALIDATION) testDataset = PetsDataset(data_path, Subset.TEST) op = chain([ vectorize(), type_cast(np.float32), add(-127.5), mul(1 / 127.5), ]) bg_training = BatchGenerator(dataset=trainingDataset, num=32, shuffle=True, op=op) bg_validation = BatchGenerator(dataset=validationDataset, num=32, shuffle=True, op=op) bg_test = BatchGenerator(dataset=testDataset, num=32, shuffle=True, op=op) def random_search(lr_max=1, lr_min=0.9, momentum_max=1, momentum_min=0.9): random_lr = (lr_max - lr_min) * np.random.random_sample() + lr_min random_momentum = (momentum_max - momentum_min) * np.random.random_sample() + momentum_min return (random_lr, random_momentum)
random_accuracy = Accuracy() validation_accuracy = Accuracy() train_accuracy = Accuracy() print('Number of Classes = {}'.format(pets_train.num_classes())) print('Number of Images = {}'.format(pets_train.__len__())) print('First 10 Classes >>> {}'.format(pets_train.labels[:10])) op = ops.chain([ ops.vectorize(), ops.type_cast(np.float32), ops.add(-127.5), ops.mul(1 / 127.5), ]) train_batches = BatchGenerator(pets_train, 100, False, op) validation_batches = BatchGenerator(pets_val, 100, False, op) print('Number of Batches = {}'.format(train_batches.__len__())) model = LinearClassifier(3072, pets_train.num_classes(), lr=0.001, momentum=0.1, nesterov=True) # test batch generator for batch in train_batches: print('Shape of the data batch: {}'.format(batch.data.shape)) print('Shape of the label batch: {}'.format(batch.label.shape)) print('First 5 Elements of the first element: {}'.format(
TrainedModel = namedtuple('TrainedModel', ['model', 'accuracy']) train_data = PetsDataset("../cifar-10-batches-py/", Subset.TRAINING) val_data = PetsDataset("../cifar-10-batches-py/", Subset.VALIDATION) test_data = PetsDataset('../cifar-10-batches-py/', Subset.TEST) op = ops.chain([ ops.vectorize(), ops.type_cast(np.float32), ops.add(-127.5), ops.mul(1/127.5), ]) train_batches = BatchGenerator(train_data, 50, False, op) val_batches = BatchGenerator(val_data, 50, False, op) test_batches = BatchGenerator(test_data, 50, False, op) def train_model(lr: float, momentum: float) -> TrainedModel: ''' Trains a linear classifier with a given learning rate (lr) and momentum. Computes the accuracy on the validation set. Returns both the trained classifier and accuracy. ''' clf = LinearClassifier(input_dim=3072, num_classes=train_data.num_classes(), lr=lr, momentum=momentum, nesterov=False) n_epochs = 10 for i in range(n_epochs): for batch in train_batches:
rcrop(25, 2, 'median'), resize(input_size, pad_mode_for_resizing), hwc2chw() ]) else: net = CatDogNet() op_chain = chain([ type_cast(dtype=np.float32), add(-127.5), mul(1 / 127.5), rcrop(25, 2, 'median'), hwc2chw() ]) batchGenerator_training = BatchGenerator(pets_training, BATCH_SIZE, shuffle=True, op=op_chain) batchGenerator_validation = BatchGenerator(pets_validation, BATCH_SIZE, shuffle=False, op=op_chain) clf = CnnClassifier(net, (BATCH_SIZE, NUM_CHANNELS, IMAGE_HEIGHT, IMAGE_WIDTH), NUM_CLASSES, lr, wd) loss_list = [] best_accuracy = 0.0 accuracy = Accuracy() epochs_since_best_accuracy = 0 for epoch in range(0, EPOCHS): print("Epoche: ", epoch + 1)
ops.add(-127.5), ops.mul(1 / 127.5), ops.hflip(), ops.rcrop(32, 4, 'constant'), ops.add_noise(), ops.hwc2chw() ]) reverse_op = ops.chain([ ops.chw2hwc(), ops.mul(127.5), ops.add(127.5), ops.type_cast(np.uint8), ]) train_batches = BatchGenerator(pets_train, 100, False, op) class Net(nn.Module): """ """ def __init__(self, num_classes): super(Net, self).__init__() # Instantiate the ReLU nonlinearity self.relu = nn.ReLU() # Instantiate two convolutional layers self.conv1 = nn.Conv2d(in_channels=3, out_channels=5, kernel_size=3,