def test_kcenters_6(): # test with a custom metric when the input data isn't a list of numpy arrays x = md.Trajectory(xyz=np.random.randn(100,1,3), topology=None) # just get the sqeuclidean for the first atom along the first coordinate metric = lambda target, ref, i: (target.xyz[:, 0, 0] - ref.xyz[i, 0, 0])**2 model1 = KCenters(n_clusters=10, metric=metric, random_state=0) model1.fit([x]) model2 = KCenters(n_clusters=10, metric='sqeuclidean', random_state=0) model2.fit([x.xyz[:, :, 0]]) eq(reduce(operator.add, model1.cluster_centers_).xyz[:, 0, 0], model2.cluster_centers_[:, 0])
def test_kcenters_2(): # some data at (0,0), some data at (1,1) and some data at (0.5, 0.5) data = [np.zeros((10,2)), np.ones((10,2)), 0.5*np.ones((10,2))] m = KCenters(n_clusters=2, random_state=0) m.fit(data) # the centers should be [0,0], [1,1] (in either order). This # assumes that the random state seeded the initial center at # either (0,0) or (1,1). A different random state could have # seeded the first cluster at [0.5, 0.5] assert np.all(m.cluster_centers_ == np.array([[0,0], [1,1]])) or \ np.all(m.cluster_centers_ == np.array([[1,1], [0,0]])) # the distances should be 0 or sqrt(2)/2 eq(np.unique(np.concatenate(m.distances_)), np.array([0, np.sqrt(2)/2]))
def test_kcenters_1(): # make sure all the shapes are correct of the fit parameters m = KCenters(n_clusters=3) m.fit([np.random.randn(23,2), np.random.randn(10,2)]) assert isinstance(m.labels_, list) assert isinstance(m.distances_, list) assert len(m.labels_) == 2 eq(m.cluster_centers_.shape, (3,2)) eq(m.labels_[0].shape, (23,)) eq(m.labels_[1].shape, (10,)) eq(m.distances_[0].shape, (23,)) eq(m.distances_[1].shape, (10,)) eq(m.fit_predict([np.random.randn(10, 2)])[0].shape, (10,))