def test_correct_num_features(data): """Test the exact number of features are selected.""" for num_features in [2, 3, 4]: dgufs = DGUFS(num_features=num_features) dgufs.fit(data) assert len(dgufs.support) == num_features
def test_iris(iris): """Select the two most important features from the Iris data set.""" dgufs = DGUFS(num_features=2) dgufs.fit(iris) assert len(dgufs.support) == 2
def test_V(data): """Test (n clusters x nrows) = shape(V).""" nrows, _ = np.shape(data) dgufs = DGUFS() dgufs.fit(data) assert np.shape(dgufs.cluster_labels) == (dgufs.num_clusters, nrows)
def test_L(data): """ Test properties of L matrix.""" dgufs = DGUFS() dgufs.fit(data) # Test L is positive semi-definite. assert np.all(np.linalg.eigvals(dgufs.L) >= 0)
def test_Y_norm(data): # Test norm(Y)_(2, 0) = m dgufs = DGUFS() dgufs.fit(data) l20_norm = np.linalg.norm(np.linalg.norm(dgufs.Y, ord=2, axis=1), ord=0) assert l20_norm == dgufs.num_features
def test_M(data): """Test M matrix is symmetric and binary.""" nrows, _ = np.shape(data) dgufs = DGUFS() dgufs.fit(data) assert np.shape(dgufs.M) == (nrows, nrows) assert len(np.unique(dgufs.M)) == 2
def test_error_num_features(data): """Test an error is raised if specifying too many features.""" with pytest.raises(ValueError): dgufs = DGUFS(num_features=10) dgufs.fit(data)