def __init__(self, hparams): self.hparams = hparams self.epochs = 0 self.curr_train_index = 0 if self.hparams.noise_type == 'fourier': i, j = freq_helpers.get_spatial_freqij(self.hparams.spatial_frequency) self.direction = freq_helpers.get_fourier_basis_image(i, j) elif self.hparams.noise_type == 'random': np.random.seed(hparams.noise_seed) self.direction = np.random.randn(32*32*3).reshape(32, 32, 3) elif self.hparams.noise_type == 'f' or self.hparams.noise_type == '1/f': self.direction = freq_helpers.get_fourier_composite_image( kind=self.hparams.noise_type) self.good_policies = found_policies.good_policies() (all_images, all_labels, test_images, test_labels, extra_test_images, extra_test_labels) = load_cifar(hparams) self.test_images, self.test_labels = test_images, test_labels self.extra_test_images, self.extra_test_labels = extra_test_images, extra_test_labels # Shuffle the data all_images = all_images[:] all_labels = all_labels[:] tf.logging.info('all_images size: {}'.format(all_images.shape)) np.random.seed(0) perm = np.arange(len(all_images)) np.random.shuffle(perm) all_images = all_images[perm] all_labels = all_labels[perm] # Break into train and val train_size, val_size = hparams.train_size, hparams.validation_size assert 50000 >= train_size + val_size self.train_images = all_images[:train_size] self.train_labels = all_labels[:train_size] self.val_images = all_images[train_size:train_size + val_size] self.val_labels = all_labels[train_size:train_size + val_size] self.num_train = self.train_images.shape[0]
def __init__(self, hparams): self.hparams = hparams self.epochs = 0 self.curr_train_index = 0 all_labels = [] self.good_policies = found_policies.good_policies() # Determine how many databatched to load num_data_batches_to_load = 5 total_batches_to_load = num_data_batches_to_load train_batches_to_load = total_batches_to_load assert hparams.train_size + hparams.validation_size <= 50000 if hparams.eval_test: total_batches_to_load += 1 # Determine how many images we have loaded total_dataset_size = 10000 * num_data_batches_to_load train_dataset_size = total_dataset_size if hparams.eval_test: total_dataset_size += 10000 if hparams.dataset == 'cifar10': all_data = np.empty((total_batches_to_load, 10000, 3072), dtype=np.uint8) elif hparams.dataset == 'cifar100': assert num_data_batches_to_load == 5 all_data = np.empty((1, 50000, 3072), dtype=np.uint8) if hparams.eval_test: test_data = np.empty((1, 10000, 3072), dtype=np.uint8) if hparams.dataset == 'cifar10': tf.logging.info('Cifar10') datafiles = [ 'data_batch_1', 'data_batch_2', 'data_batch_3', 'data_batch_4', 'data_batch_5' ] datafiles = datafiles[:train_batches_to_load] if hparams.eval_test: datafiles.append('test_batch') num_classes = 10 elif hparams.dataset == 'cifar100': datafiles = ['train'] if hparams.eval_test: datafiles.append('test') num_classes = 100 else: raise NotImplementedError('Unimplemented dataset: ', hparams.dataset) if hparams.dataset != 'test': for file_num, f in enumerate(datafiles): d = unpickle(os.path.join(hparams.data_path, f)) if f == 'test': test_data[0] = copy.deepcopy(d['data']) all_data = np.concatenate([all_data, test_data], axis=1) else: all_data[file_num] = copy.deepcopy(d['data']) if hparams.dataset == 'cifar10': labels = np.array(d['labels']) else: labels = np.array(d['fine_labels']) nsamples = len(labels) for idx in range(nsamples): all_labels.append(labels[idx]) all_data = all_data.reshape(total_dataset_size, 3072) all_data = all_data.reshape(-1, 3, 32, 32) all_data = all_data.transpose(0, 2, 3, 1).copy() all_data = all_data / 255.0 mean = augmentation_transforms.MEANS std = augmentation_transforms.STDS tf.logging.info('mean:{} std: {}'.format(mean, std)) all_data = (all_data - mean) / std all_labels = np.eye(num_classes)[np.array(all_labels, dtype=np.int32)] assert len(all_data) == len(all_labels) tf.logging.info('In CIFAR10 loader, number of images: {}'.format( len(all_data))) # Break off test data if hparams.eval_test: self.test_images = all_data[train_dataset_size:] self.test_labels = all_labels[train_dataset_size:] # Shuffle the rest of the data all_data = all_data[:train_dataset_size] all_labels = all_labels[:train_dataset_size] np.random.seed(0) perm = np.arange(len(all_data)) np.random.shuffle(perm) all_data = all_data[perm] all_labels = all_labels[perm] # Break into train and val train_size, val_size = hparams.train_size, hparams.validation_size assert 50000 >= train_size + val_size self.train_images = all_data[:train_size] self.train_labels = all_labels[:train_size] self.val_images = all_data[train_size:train_size + val_size] self.val_labels = all_labels[train_size:train_size + val_size] self.num_train = self.train_images.shape[0]
def __init__(self, hparams): self.hparams = hparams self.epochs = 0 self.curr_train_index = 0 all_labels = [] self.good_policies = found_policies.good_policies() # Determine how many databatched to load num_data_batches_to_load = 5 total_batches_to_load = num_data_batches_to_load # 5 train_batches_to_load = total_batches_to_load # 5 assert hparams.train_size + hparams.validation_size <= 50000 if hparams.eval_test: total_batches_to_load += 1 # 6 # Determine how many images we have loaded total_dataset_size = 10000 * num_data_batches_to_load # 10000 * 5; 50000 train_dataset_size = total_dataset_size # 50000 if hparams.eval_test: total_dataset_size += 10000 # 60000 if hparams.dataset == 'cifar10': all_data = np.empty((total_batches_to_load, 10000, 3072), dtype=np.uint8) # (6, 10000, 3072) elif hparams.dataset == 'cifar100': assert num_data_batches_to_load == 5 all_data = np.empty((1, 50000, 3072), dtype=np.uint8) if hparams.eval_test: test_data = np.empty((1, 10000, 3072), dtype=np.uint8) # my edits elif hparams.dataset in ['cifar10_30k', 'cifar102_30k']: num_data_batches_to_load = 1 total_batches_to_load = num_data_batches_to_load # 1 train_batches_to_load = total_batches_to_load # 1 if hparams.eval_test: total_batches_to_load += 1 # 2 total_dataset_size = 24000 * num_data_batches_to_load # 24000 train_dataset_size = total_dataset_size # 24000; assign old value before the increment if hparams.eval_test: total_dataset_size += 6000 # 30000 all_data = np.empty((1, 24000, 3072), dtype=np.uint8) test_data = np.empty((1, 6000, 3072), dtype=np.uint8) elif hparams.dataset in [ 'cifar10_10k', 'cifar102' ]: # this cifar102 is a 10,000 example data set num_data_batches_to_load = 1 total_batches_to_load = num_data_batches_to_load # 1 train_batches_to_load = total_batches_to_load # 1 if hparams.eval_test: total_batches_to_load += 1 # 2 total_dataset_size = 8000 * num_data_batches_to_load # 24000 train_dataset_size = total_dataset_size # 24000; assign old value before the increment if hparams.eval_test: total_dataset_size += 2000 # 30000 all_data = np.empty((1, 8000, 3072), dtype=np.uint8) test_data = np.empty((1, 2000, 3072), dtype=np.uint8) elif hparams.dataset in ['cifar10_12k', 'cifar105']: num_data_batches_to_load = 1 total_batches_to_load = num_data_batches_to_load # 1 train_batches_to_load = total_batches_to_load # 1 if hparams.eval_test: total_batches_to_load += 1 # 2 total_dataset_size = 10000 * num_data_batches_to_load # 24000 train_dataset_size = total_dataset_size # 24000; assign old value before the increment if hparams.eval_test: total_dataset_size += 2000 # 30000 all_data = np.empty((1, 10000, 3072), dtype=np.uint8) test_data = np.empty((1, 2000, 3072), dtype=np.uint8) # end my edits if hparams.dataset == 'cifar10': tf.logging.info('Cifar10') datafiles = [ 'data_batch_1', 'data_batch_2', 'data_batch_3', 'data_batch_4', 'data_batch_5' ] datafiles = datafiles[:train_batches_to_load] if hparams.eval_test: datafiles.append('test_batch') num_classes = 10 elif hparams.dataset == 'cifar100': datafiles = ['train'] if hparams.eval_test: datafiles.append('test') num_classes = 100 # my edits elif hparams.dataset == 'cifar10_10k': datafiles = ['cifar10_10k_train_py27'] if hparams.eval_test: datafiles.append('cifar10_10k_test_py27') num_classes = 10 elif hparams.dataset == 'cifar10_30k': datafiles = ['cifar10_30k_train_py27'] if hparams.eval_test: datafiles.append('cifar10_30k_test_py27') num_classes = 10 elif hparams.dataset == 'cifar102_30k': datafiles = ['cifar102_30k_train_py27'] if hparams.eval_test: datafiles.append('cifar102_30k_test_py27') num_classes = 10 elif hparams.dataset == 'cifar102': datafiles = ['cifar102_min_overlap_train_v4_py27'] if hparams.eval_test: datafiles.append('cifar102_min_overlap_test_v4_py27') num_classes = 10 elif hparams.dataset == 'cifar10_12k': datafiles = ['cifar10_12k_train'] if hparams.eval_test: datafiles.append('cifar10_12k_test') num_classes = 10 elif hparams.dataset == 'cifar105': datafiles = ['cifar105_train'] if hparams.eval_test: datafiles.append('cifar105_test') num_classes = 10 # end my edits else: raise NotImplementedError('Unimplemented dataset: ', hparams.dataset) if hparams.dataset != 'test': for file_num, f in enumerate(datafiles): # my notes: # first iteration will be a training set # second will be a test set d = unpickle(os.path.join(hparams.data_path, f)) if f == 'test': # my note: this is associated with cifar100; we will use it for our setup as well test_data[0] = copy.deepcopy(d[b'data']) all_data = np.concatenate([all_data, test_data], axis=1) # my edit: elif f in [ 'cifar10_10k_test_py27', 'cifar10_30k_test_py27', 'cifar102_min_overlap_test_v4_py27', 'cifar102_30k_test_py27', 'cifar105_test', 'cifar10_12k_test' ]: test_data[0] = copy.deepcopy(d[b'data']) # test set is always second iteration; so we can now do this... all_data = np.concatenate([all_data, test_data], axis=1) # end my edit else: # my note: this will occur during the first iteration; assigns training data to all_data # troubleshoot #print(len(d[b'data'])) all_data[file_num] = copy.deepcopy(d[b'data']) if hparams.dataset == 'cifar10': labels = np.array(d[b'labels']) # my edit elif hparams.dataset in [ 'cifar10', 'cifar10_10k', 'cifar10_30k', 'cifar102', 'cifar102_30k', 'cifar10_12k', 'cifar105' ]: labels = np.array(d[b'labels']) # end my edit else: labels = np.array(d[b'fine_labels']) nsamples = len(labels) for idx in range(nsamples): all_labels.append(labels[idx]) # train and test batches are now together all_data = all_data.reshape(total_dataset_size, 3072) all_data = all_data.reshape(-1, 3, 32, 32) all_data = all_data.transpose(0, 2, 3, 1).copy() all_data = all_data / 255.0 # my edit if hparams.dataset in [ 'cifar10', 'cifar10_10k', 'cifar10_30k', 'cifar102', 'cifar102_30k' ]: # match format for mean/std calc as noted in augmentation_transforms.py mean = np.mean(all_data[:train_dataset_size], axis=(0, 1, 2)) # training set channel means std = np.std(all_data[:train_dataset_size], axis=(0, 1, 2)) # training set channel stds else: mean = augmentation_transforms.MEANS std = augmentation_transforms.STDS # end my edit # original code #mean = augmentation_transforms.MEANS #std = augmentation_transforms.STDS tf.logging.info('mean:{} std: {}'.format(mean, std)) all_data = (all_data - mean) / std all_labels = np.eye(num_classes)[np.array(all_labels, dtype=np.int32)] assert len(all_data) == len(all_labels) tf.logging.info('In CIFAR10 loader, number of images: {}'.format( len(all_data))) # Break off test data if hparams.eval_test: self.test_images = all_data[train_dataset_size:] self.test_labels = all_labels[train_dataset_size:] # Shuffle the rest of the data all_data = all_data[:train_dataset_size] all_labels = all_labels[:train_dataset_size] np.random.seed(0) perm = np.arange(len(all_data)) np.random.shuffle(perm) all_data = all_data[perm] all_labels = all_labels[perm] # Break into train and val train_size, val_size = hparams.train_size, hparams.validation_size # my edits # keep functionality of original code; # not really an issue since the default hyperparameter for validation_size is 0 if hparams.dataset in ['cifar10', 'cifar100']: assert 50000 >= train_size + val_size # end my edits # original code #assert 50000 >= train_size + val_size self.train_images = all_data[:train_size] self.train_labels = all_labels[:train_size] self.val_images = all_data[train_size:train_size + val_size] self.val_labels = all_labels[train_size:train_size + val_size] self.num_train = self.train_images.shape[0]
def __init__(self, hparams): self.hparams = hparams self.epochs = 0 self.curr_train_index = 0 all_labels = [] self.good_policies = found_policies.good_policies() total_dataset_size = 50000 train_dataset_size = 50000 if hparams.eval_test: total_dataset_size += 10000 all_data = np.empty((1, 50000, 3072), dtype=np.uint8) if hparams.eval_test: test_data = np.empty((1, 10000, 3072), dtype=np.uint8) datafiles = ['train'] # 'train' is the filename of train dataset if hparams.eval_test: datafiles.append('test') # 'test' is the filename of test dataset num_classes = 100 # fine labels # Loading train and test dataset for file_num, f in enumerate(datafiles): d = unpickle(os.path.join(hparams.data_path, f)) if f == 'test': test_data[0] = copy.deepcopy(d['data']) all_data = np.concatenate([all_data, test_data], axis=1) else: all_data[file_num] = copy.deepcopy(d['data']) labels = np.array(d['fine_labels']) nsamples = len(labels) for idx in range(nsamples): all_labels.append(labels[idx]) # Data processing all_data = all_data.reshape(total_dataset_size, 3072) all_data = all_data.reshape(-1, 3, 32, 32) all_data = all_data.transpose(0, 2, 3, 1).copy() all_data = all_data / 255.0 mean = augmentation_transforms.MEANS std = augmentation_transforms.STDS tf.logging.info('mean:{} std: {}'.format(mean, std)) all_data = (all_data - mean) / std all_labels = np.eye(num_classes)[np.array(all_labels, dtype=np.int32)] assert len(all_data) == len(all_labels) tf.logging.info('In CIFAR100 loader, number of images: {}'.format( len(all_data))) # Break off test data if hparams.eval_test: self.test_images = all_data[train_dataset_size:] self.test_labels = all_labels[train_dataset_size:] # Shuffle the rest of the data all_data = all_data[:train_dataset_size] all_labels = all_labels[:train_dataset_size] np.random.seed(0) perm = np.arange(len(all_data)) np.random.shuffle(perm) all_data = all_data[perm] all_labels = all_labels[perm] train_size, val_size = hparams.train_size, hparams.validation_size self.train_images = all_data[:train_size] self.train_labels = all_labels[:train_size] self.num_train = self.train_images.shape[0]