def load_data(if_norm=True): print('Loading data set ...') load_time = time.time() test_data = read_test('./data/test.csv') test_data = np.array(test_data) train_data = read_train('./data/train.csv') train_data = np.array(train_data) loaded_time = time.time() - load_time print('test_data: {0}, train_data shape: {1}'.format( test_data.shape, train_data.shape)) train_features, train_labels = train_data[:, 1:-1], train_data[:, -1].astype(int) test_features = test_data[:, 1:] if if_norm is True: test_min, test_max = test_features.min(), test_features.max() test_features_norm = (test_features - test_min) / (test_max - test_min) train_min, train_max = train_features.min(), train_features.max() train_features_norm = (train_features - train_min) / (train_max - train_min) print('Data set loaded successfully in {0:.4f} seconds.'.format( loaded_time)) return test_features_norm, train_features_norm, train_labels else: print('Data set loaded successfully in {0:.4f} seconds.'.format( loaded_time)) return test_features, train_features, train_labels