def process_images(names, out_loc, mean=None, sd=None): print('Names: ', names) dataset = NORBDataset(dataset_root='/dfs/scratch1/thomasat/datasets/norb', names=names) Xs = [] Ys = [] print('Dataset names: ', dataset.data.keys()) for name in names: X, Y = process_data(dataset.data[name]) print('X,Y shape: ', X.shape, Y.shape) Xs.append(X) Ys.append(Y) X = np.vstack(Xs) Y = np.vstack(Ys) # Shuffle idx = np.arange(0, X.shape[0]) np.random.shuffle(idx) X = X[idx,:] Y = Y[idx,:] if mean is None and sd is None: X, mean, sd = normalize_data(X) print('X, Y: ', X.shape, Y.shape) else: X = apply_normalization(X,mean,sd) # Save data_dict = {'X': X, 'Y': Y} pkl.dump(data_dict, open(out_loc, 'wb'), protocol=2) return mean,sd
enc = OneHotEncoder() Y = enc.fit_transform(Y).todense() return X, Y train_loc = '/dfs/scratch1/thomasat/datasets/convex/convex_train.amat' test_loc = '/dfs/scratch1/thomasat/datasets/convex/50k/convex_test.amat' train_out = '/dfs/scratch1/thomasat/datasets/convex/train_normalized' test_out = '/dfs/scratch1/thomasat/datasets/convex/test_normalized' train_data = np.genfromtxt(train_loc) train_X, train_Y = process_data(train_data) test_data = np.genfromtxt(test_loc) test_X, test_Y = process_data(test_data) # Normalize train_X, mean, sd = normalize_data(train_X) test_X = apply_normalization(test_X, mean, sd) # Save print('test_X, test_Y shape: ', test_X.shape, test_Y.shape) print('train_X, train_Y shape: ', train_X.shape, train_Y.shape) train = {'X': train_X, 'Y': train_Y} test = {'X': test_X, 'Y': test_Y} pkl.dump(train, open(train_out, 'wb'), protocol=2) pkl.dump(test, open(test_out, 'wb'), protocol=2) print('Saved train to: ', train_out) print('Saved test to: ', test_out)