def test_normalize_with_scaler(self): scaler = MinMaxScaler() scaler.fit(self.data) self.assertTrue( np.array_equal(scaler.transform(self.data), Normalizer.normalize(self.data, scaler)[1])) self.assertEqual(scaler, Normalizer.normalize(self.data, scaler)[0])
def test_standardize_with_scaler(self): scaler = StandardScaler() scaler.fit(self.data) self.assertTrue( np.array_equal(scaler.transform(self.data), Normalizer.standardize(self.data, scaler)[1])) self.assertEqual(scaler, Normalizer.standardize(self.data, scaler)[0])
def __get_training_testing_sets(self, feat_matrix, label_matrix, num_per_class): """ Obtain training and testing features by random sampling Args: feat_matrix (np.ndarray): input features label_matrix (np.ndarray): label matrix for input features num_per_class (int): number of training samples from each category Return: train_feats (np.ndarray): training features train_labels (np.ndarray): label matrix for training features test_feats (np.ndarray): testing features test_labels (np.ndarray): label matrix for testing features """ assert isinstance(feat_matrix, np.ndarray) assert isinstance(label_matrix, np.ndarray) assert isinstance(num_per_class, int) num_class = label_matrix.shape[0] # number of objects test_feats = np.empty((feat_matrix.shape[0], 0)) test_labels = np.empty((label_matrix.shape[0], 0)) train_feats = np.empty((feat_matrix.shape[0], 0)) train_labels = np.empty((label_matrix.shape[0], 0)) for classid in range(num_class): col_ids = np.array( np.nonzero(label_matrix[classid, :] == 1)).ravel() data_ids = np.array(np.nonzero(np.sum(feat_matrix[:, col_ids]**2, axis=0) > 1e-6))\ .ravel() trainids = col_ids[np.random.choice(data_ids, num_per_class, replace=False)] testids = np.setdiff1d(col_ids, trainids) test_feats = np.c_[test_feats, feat_matrix[:, testids]] test_labels = np.c_[test_labels, label_matrix[:, testids]] train_feats = np.c_[train_feats, feat_matrix[:, trainids]] train_labels = np.c_[train_labels, label_matrix[:, trainids]] if self.normalizer in Normalizer.CHOICES[:4]: # sample normalization train_feats = Normalizer()(self.normalizer, data=train_feats.T).T test_feats = Normalizer()(self.normalizer, data=test_feats.T).T else: # feature scaling scaler, train_feats = Normalizer()(self.normalizer, data=train_feats.T) train_feats = train_feats.T test_feats = Normalizer()(self.normalizer, data=test_feats.T, fitted_scaler=scaler)[1].T return train_feats, train_labels, test_feats, test_labels
def test_get_normalizer_normalize_with_scaler(self): scaler = MinMaxScaler() scaler.fit(self.data) self.assertTrue( np.array_equal( Normalizer.normalize(self.data, fitted_scaler=scaler)[1], Normalizer.get_normalizer(Normalizer.NORMALIZE, data=self.data, fitted_scaler=scaler)[1])) self.assertTrue( Normalizer.normalize(self.data, fitted_scaler=scaler)[0], Normalizer.get_normalizer(Normalizer.NORMALIZE, data=self.data, fitted_scaler=scaler)[0])
def test_get_normalizer_standardize_with_scaler(self): scaler = StandardScaler() scaler.fit(self.data) self.assertTrue( np.array_equal( Normalizer.standardize(self.data, fitted_scaler=scaler)[1], Normalizer.get_normalizer(Normalizer.STANDARDIZE, data=self.data, fitted_scaler=scaler)[1])) self.assertTrue( Normalizer.standardize(self.data, fitted_scaler=scaler)[0], Normalizer.get_normalizer(Normalizer.STANDARDIZE, data=self.data, fitted_scaler=scaler)[0])
def __init__(self, normalizer=Normalizer.NONE): """ Loads the training and testing datasets Args: normalizer (Normalizer option): Normalization to apply """ with open(settings.TRAINING_DATA_DIRECTORY_DATASET_PATH, 'r') as file_: self.training_data = json.load(file_) self.to_numpy(self.training_data) if normalizer in Normalizer.CHOICES[:4]: # sample normalization self.training_data['codes'] = Normalizer()( normalizer, data=self.training_data['codes'].T).T else: # feature scaling scaler, self.training_data['codes'] = Normalizer()( normalizer, data=self.training_data['codes'].T) self.training_data['codes'] = self.training_data['codes'].T # sorting dataset self.training_data['codes'], self.training_data[ 'labels'] = self.sort_dataset(self.training_data['codes'], self.training_data['labels']) self.training_data['labels'] = self.training_data['labels'].astype( np.float64) with open(settings.TESTING_DATA_DIRECTORY_DATASET_PATH, 'r') as file_: self.testing_data = json.load(file_) self.to_numpy(self.testing_data) if normalizer in Normalizer.CHOICES[:4]: # sample normalization self.testing_data['codes'] = Normalizer()( normalizer, data=self.testing_data['codes'].T).T else: # feature scaling self.testing_data['codes'] = Normalizer()( normalizer, data=self.testing_data['codes'].T, fitted_scaler=scaler)[1].T # sorting dataset self.testing_data['codes'], self.testing_data[ 'labels'] = self.sort_dataset(self.testing_data['codes'], self.testing_data['labels']) self.testing_data['labels'] = self.testing_data['labels'].astype( np.float64)
def test_get_normalizer_standardize(self): self.assertTrue( np.array_equal( Normalizer.standardize(self.data)[1], Normalizer.get_normalizer(Normalizer.STANDARDIZE, data=self.data)[1]))
def test_get_normalizer_max_norm(self): self.assertTrue( np.array_equal( Normalizer.max_norm(self.data), Normalizer.get_normalizer(Normalizer.MAX_NORM, data=self.data)))
def test_get_normalizer_l2_norm(self): self.assertTrue( np.array_equal( Normalizer.l2_norm(self.data), Normalizer.get_normalizer(Normalizer.L2_NORM, data=self.data)))
def test_get_normalizer_none(self): self.assertTrue( np.array_equal( Normalizer.none(self.data), Normalizer.get_normalizer(Normalizer.NONE, data=self.data)))
def test_get_normalizer_normalize(self): self.assertTrue( np.array_equal( Normalizer.normalize(self.data)[1], Normalizer.get_normalizer(Normalizer.NORMALIZE, data=self.data)[1]))
def test_normalize(self): self.assertTrue( np.array_equal(MinMaxScaler().fit_transform(self.data), Normalizer.normalize(self.data)[1]))
def test_standardize(self): self.assertTrue( np.array_equal(StandardScaler().fit_transform(self.data), Normalizer.standardize(self.data)[1]))
def test_max_norm(self): self.assertTrue( np.array_equal(normalize(self.data, 'max'), Normalizer.max_norm(self.data)))
def test_l2_norm(self): self.assertTrue( np.array_equal(normalize(self.data, 'l2'), Normalizer.l2_norm(self.data)))
def test_none(self): self.assertTrue(np.array_equal(self.data, Normalizer.none(self.data)))
def test_functor(self): self.assertTrue( np.array_equal(Normalizer.l1_norm(self.data), Normalizer()(Normalizer.L1_NORM, data=self.data)))