def get_data(self, nb_points=500): """ Gets pre-process mnist training and testing data. Because this method is for testing it takes as input the number of datapoints, nb_points, to be included in the training and testing set. :param nb_points: Number of data points to be included in each set :type nb_points: `int` :return: training data :rtype: `tuple` """ num_classes = 1 img_rows, img_cols = 64, 64 if self.file_name is None: (x_train, y_train), (x_test, y_test) = load_mnist() # Reduce datapoints to make test faster x_train = x_train[:nb_points] y_train = y_train[:nb_points] x_test = x_test[:nb_points] y_test = y_test[:nb_points] else: try: logger.info('Loaded training data from ' + str(self.file_name)) data_train = np.load(self.file_name) x_train = data_train['x_train'] y_train = data_train['y_train'] x_test = data_train['x_test'] y_test = data_train['y_test'] except Exception: raise IOError('Unable to load training data from path ' 'provided in config file: ' + self.file_name) if self.channels_first: x_train = x_train.reshape(x_train.shape[0], 1, img_rows, img_cols) x_test = x_test.reshape(x_test.shape[0], 1, img_rows, img_cols) else: x_train = x_train.reshape(x_train.shape[0], img_rows, img_cols, 1) x_test = x_test.reshape(x_test.shape[0], img_rows, img_cols, 1) print('x_train shape:', x_train.shape) print(x_train.shape[0], 'train samples') print(x_test.shape[0], 'test samples') # convert class vectors to binary class matrices y_train = np.eye(num_classes)[y_train] y_test = np.eye(num_classes)[y_test] return (x_train, y_train), (x_test, y_test)
def get_data(self, nb_points=500): """ Gets pre-process mnist training and testing data. Because this method is for testing it takes as input the number of datapoints, nb_points, to be included in the training and testing set. :param: nb_points: Number of data points to be included in each set :type nb_points: `int` :return: training data :rtype: `tuple` """ if self.file_name is None: (x_train, y_train), (x_test, y_test) = load_mnist() # Reduce datapoints to make test faster x_train = x_train[:nb_points] y_train = y_train[:nb_points] x_test = x_test[:nb_points] y_test = y_test[:nb_points] else: try: logger.info( 'Loaded training data from ' + str(self.file_name)) data_train = np.load(self.file_name) with open("MNIST-pkl/mnist-keras-train.pkl", 'rb') as f: (x_train, y_train)= pickle.load(f) with open("MNIST-pkl/mnist-keras-train.pkl", 'rb') as f: (x_test, y_test)= pickle.load(f) except Exception: raise IOError('Unable to load training data from path ' 'provided in config file: ' + self.file_name) # Add a channels dimension import tensorflow as tf x_train = x_train[..., tf.newaxis] x_test = x_test[..., tf.newaxis] print('x_train shape:', x_train.shape) print(x_train.shape[0], 'train samples') print(x_test.shape[0], 'test samples') return (x_train, y_train), (x_test, y_test)
def __init__(self, data_config): super().__init__() (X_train, y_train), (X_test, y_test) = load_mnist() X_train = X_train.reshape(X_train.shape[0], 28, 28, 1) X_train = X_train.astype('float32') X_train /= 255 X_test = X_test.reshape(X_test.shape[0], 28, 28, 1) X_test = X_test.astype('float32') X_test /= 255 y_train = np_utils.to_categorical(y_train) y_test = np_utils.to_categorical(y_test) train_gen = ImageDataGenerator(rotation_range=8, width_shift_range=0.08, shear_range=0.3, height_shift_range=0.08, zoom_range=0.08) test_gen = ImageDataGenerator() self.train_datagenerator = train_gen.flow( X_train, y_train, batch_size=64) self.test_datagenerator = train_gen.flow(X_test, y_test, batch_size=64)
def save_mnist_party_data(nb_dp_per_party, should_stratify, party_folder): """ Saves MNIST party data :param nb_dp_per_party: the number of data points each party should have :type nb_dp_per_party: `list[int]` :param should_stratify: True if data should be assigned proportional to source class distributions :type should_stratify: `bool` :param party_folder: folder to save party data :type party_folder: `str` """ dataset_path = os.path.join("backend", "datasets") if not os.path.exists(dataset_path): os.makedirs(dataset_path) (x_train, y_train), (x_test, y_test) = load_mnist(download_dir=dataset_path) labels, train_counts = np.unique(y_train, return_counts=True) te_labels, test_counts = np.unique(y_test, return_counts=True) if np.all(np.isin(labels, te_labels)): print("Warning: test set and train set contain different labels") num_train = np.shape(y_train)[0] num_test = np.shape(y_test)[0] num_labels = np.shape(np.unique(y_test))[0] nb_parties = len(nb_dp_per_party) if should_stratify: # Sample according to source label distribution train_probs = { label: train_counts[label] / float(num_train) for label in labels} test_probs = {label: test_counts[label] / float(num_test) for label in te_labels} else: # Sample uniformly train_probs = {label: 1.0 / len(labels) for label in labels} test_probs = {label: 1.0 / len(te_labels) for label in te_labels} for idx, dp in enumerate(nb_dp_per_party): train_p = np.array([train_probs[y_train[idx]] for idx in range(num_train)]) train_p /= np.sum(train_p) train_indices = np.random.choice(num_train, dp, p=train_p) test_p = np.array([test_probs[y_test[idx]] for idx in range(num_test)]) test_p /= np.sum(test_p) # Split test evenly test_indices = np.random.choice( num_test, int(num_test / nb_parties), p=test_p) x_train_pi = x_train[train_indices] y_train_pi = y_train[train_indices] x_test_pi = x_test[test_indices] y_test_pi = y_test[test_indices] # Now put it all in an npz name_file = 'data_party' + str(idx) + '.npz' name_file = os.path.join(party_folder, name_file) np.savez(name_file, x_train=x_train_pi, y_train=y_train_pi, x_test=x_test_pi, y_test=y_test_pi) print_statistics(idx, x_test_pi, x_train_pi, num_labels, y_train_pi) print('Finished! :) Data saved in ', party_folder)