def test_precomputed_with_pca(): with assert_warns_message( RuntimeWarning, "n_pca cannot be given on a precomputed graph. Setting n_pca=None", ): build_graph(squareform(pdist(data)), precomputed="distance", n_pca=20)
def test_build_landmark_with_too_many_landmarks(): build_graph(data, n_landmark=len(data))
def test_verbose(): print() print("Verbose test: Landmark") build_graph(data, decay=None, n_landmark=500, verbose=True).landmark_op
def test_balltree_cosine(): with assert_warns_message( UserWarning, "Metric cosine not valid for `sklearn.neighbors.BallTree`. Graph instantiation may be slower than normal.", ): build_graph(data, n_pca=20, decay=10, distance="cosine", thresh=1e-4)
def test_bandwidth_no_decay(): with assert_warns_message( UserWarning, "`bandwidth` is not used when `decay=None`." ): build_graph(data, n_pca=20, decay=None, bandwidth=3, thresh=1e-4)
def test_duplicate_data(): with assert_warns_regex( RuntimeWarning, r"Detected zero distance between samples ([0-9and,\s]*). Consider removing duplicates to avoid errors in downstream processing.", ): build_graph(np.vstack([data, data[:10]]), n_pca=20, decay=10, thresh=1e-4)
def test_verbose(): print() print("Verbose test: kNN") build_graph(data, decay=None, verbose=True)
def test_badstring_rank_threshold(): with assert_raises_message( ValueError, "rank_threshold must be positive float or 'auto'." ): build_graph(data, n_pca=True, rank_threshold="foobar")
def test_negative_rank_threshold(): with assert_raises_message( ValueError, "rank_threshold must be positive float or 'auto'." ): build_graph(data, n_pca=True, rank_threshold=-1)
def test_uncastable_n_pca(): with assert_raises_message( ValueError, "n_pca was not an instance of numbers.Number, could not be cast to False, and not None. Please supply an integer 0 <= n_pca < min(n_samples,n_features) or None", ): build_graph(data, n_pca=[])
def test_negative_n_pca(): with assert_raises_message( ValueError, "n_pca cannot be negative. Please supply an integer 0 <= n_pca < min(n_samples,n_features) or None", ): build_graph(data, n_pca=-1)
def test_badstring_n_pca(): with assert_raises_message( ValueError, "n_pca must be an integer 0 <= n_pca < min(n_samples,n_features), or in [None, False, True, 'auto'].", ): build_graph(data, n_pca="foobar")
def test_0_n_pca(): assert build_graph(data, n_pca=0).n_pca is None assert build_graph(data, n_pca=False).n_pca is None
def test_pandas_dataframe(): G = build_graph(pd.DataFrame(data)) assert isinstance(G, graphtools.base.BaseGraph) assert isinstance(G.data, np.ndarray)
def test_build_knn_with_precomputed(): with assert_raises_message( ValueError, "kNNGraph does not support precomputed values. Use `graphtype='exact'` or `precomputed=None`", ): build_graph(data, n_pca=None, graphtype="knn", precomputed="distance")
def test_sample_idx_and_precomputed(): build_graph(data, n_pca=None, sample_idx=np.arange(10), precomputed='distance')
def test_build_knn_with_sample_idx(): with assert_raises_message( ValueError, "kNNGraph does not support batch correction. Use `graphtype='mnn'` or `sample_idx=None`", ): build_graph(data, graphtype="knn", sample_idx=np.arange(len(data)))
def test_sample_idx_wrong_length(): build_graph(data, graphtype='mnn', sample_idx=np.arange(10))
def test_duplicate_data_many(): with assert_warns_regex( RuntimeWarning, "Detected zero distance between ([0-9]*) pairs of samples. Consider removing duplicates to avoid errors in downstream processing.", ): build_graph(np.vstack([data, data[:21]]), n_pca=20, decay=10, thresh=1e-4)
def test_sample_idx_unique(): build_graph(data, graph_class=graphtools.graphs.MNNGraph, sample_idx=np.ones(len(data)))
def test_set_params(): G = build_graph(data, decay=None) assert G.get_params() == { "n_pca": 20, "random_state": 42, "kernel_symm": "+", "theta": None, "anisotropy": 0, "knn": 3, "knn_max": None, "decay": None, "bandwidth": None, "bandwidth_scale": 1, "distance": "euclidean", "thresh": 0, "n_jobs": -1, "verbose": 0, }, G.get_params() G.set_params(n_jobs=4) assert G.n_jobs == 4 assert G.knn_tree.n_jobs == 4 G.set_params(random_state=13) assert G.random_state == 13 G.set_params(verbose=2) assert G.verbose == 2 G.set_params(verbose=0) with assert_raises_message( ValueError, "Cannot update knn. Please create a new graph" ): G.set_params(knn=15) with assert_raises_message( ValueError, "Cannot update knn_max. Please create a new graph" ): G.set_params(knn_max=15) with assert_raises_message( ValueError, "Cannot update decay. Please create a new graph" ): G.set_params(decay=10) with assert_raises_message( ValueError, "Cannot update distance. Please create a new graph" ): G.set_params(distance="manhattan") with assert_raises_message( ValueError, "Cannot update thresh. Please create a new graph" ): G.set_params(thresh=1e-3) with assert_raises_message( ValueError, "Cannot update theta. Please create a new graph" ): G.set_params(theta=0.99) with assert_raises_message( ValueError, "Cannot update kernel_symm. Please create a new graph" ): G.set_params(kernel_symm="*") with assert_raises_message( ValueError, "Cannot update anisotropy. Please create a new graph" ): G.set_params(anisotropy=0.7) with assert_raises_message( ValueError, "Cannot update bandwidth. Please create a new graph" ): G.set_params(bandwidth=5) with assert_raises_message( ValueError, "Cannot update bandwidth_scale. Please create a new graph" ): G.set_params(bandwidth_scale=5) G.set_params( knn=G.knn, decay=G.decay, thresh=G.thresh, distance=G.distance, theta=G.theta, anisotropy=G.anisotropy, kernel_symm=G.kernel_symm, )
def test_sample_idx_none(): build_graph(data, graphtype='mnn', sample_idx=None)
def test_knnmax_too_large(): with assert_warns_message( UserWarning, "Cannot set knn_max (9) to be less than knn (10). Setting knn_max=10", ): build_graph(data, n_pca=20, decay=10, knn=10, knn_max=9, thresh=1e-4)
def test_build_mnn_with_precomputed(): build_graph(data, n_pca=None, graphtype='mnn', precomputed='distance')
def test_knn_no_knn_no_bandwidth(): with assert_raises_message( ValueError, "Either `knn` or `bandwidth` must be provided." ): build_graph(data, graphtype="knn", knn=None, bandwidth=None, thresh=1e-4)
def test_knn_graph_invalid_symm(): with assert_raises_message( ValueError, "kernel_symm 'invalid' not recognized. Choose from '+', '*', 'mnn', or 'none'.", ): build_graph(data, graphtype="knn", knn=5, thresh=1e-4, kernel_symm="invalid")
def test_build_landmark_with_too_few_points(): build_graph(data[:50], n_landmark=25, n_svd=100)
def test_build_knn_with_exact_alpha(): with assert_raises_message( ValueError, "Cannot instantiate a kNNGraph with `decay=None`, `thresh=0` and `knn_max=None`. Use a TraditionalGraph instead.", ): build_graph(data, graphtype="knn", decay=10, thresh=0)
def test_build_landmark_with_too_few_points(): with assert_warns_message( RuntimeWarning, "n_svd (100) >= n_samples (50) Consider using kNNGraph or lower n_svd", ): build_graph(data[:50], n_landmark=25, n_svd=100)
def test_fractional_n_pca(): with assert_warns_message( RuntimeWarning, "Cannot perform PCA to fractional 1.5 dimensions. Rounding to 2" ): build_graph(data, n_pca=1.5)