def test_random_lloyd_host_ptr(self): hostptr = (self.samples.__array_interface__["data"][0], -1, self.samples.shape) with self.stdout: centroids, assignments = kmeans_cuda(hostptr, 50, init="random", device=0, verbosity=2, seed=3, tolerance=0.05, yinyang_t=0) self.assertEqual(self._get_iters_number(self.stdout), 7) self.assertEqual(centroids.shape, (50, 2)) self.assertEqual(assignments.shape, (13000, )) self._validate(centroids, assignments, 0.05) with self.assertRaises(ValueError): kmeans_cuda(("bullshit", -1, self.samples.shape), 50, init="random", device=0, verbosity=2, seed=3, tolerance=0.05, yinyang_t=0) with self.assertRaises(TypeError): kmeans_cuda("bullshit", 50, init="random", device=0, verbosity=2, seed=3, tolerance=0.05, yinyang_t=0)
def test_256_features(self): arr = numpy.random.rand(1000, 256).astype(numpy.float32) arr /= numpy.linalg.norm(arr, axis=1)[:, None] with self.stdout: kmeans_cuda( arr, 10, init="kmeans++", metric="cos", device=0, verbosity=3, yinyang_t=0.1, seed=3) self.assertEqual(self._get_iters_number(self.stdout), 9)
def test_fp16_kmeanspp_validate(self): centroids32, _ = kmeans_cuda( self.samples, 50, init="kmeans++", device=1, verbosity=2, seed=3, tolerance=1.0, yinyang_t=0) samples = self.samples.astype(numpy.float16) centroids16, _ = kmeans_cuda( samples, 50, init="kmeans++", device=1, verbosity=2, seed=3, tolerance=1.0, yinyang_t=0) delta = numpy.max(abs(centroids16[:4] - centroids32[:4])) self.assertLess(delta, 1.5e-4)
def test_afkmc2_big_k_lloyd(self): with self.stdout: kmeans_cuda(self.samples, 200, init=("afkmc2", 100), device=0, verbosity=2, seed=3, tolerance=0.05, yinyang_t=0) self.assertEqual(self._get_iters_number(self.stdout), 4)
def test_import_lloyd(self): with self.stdout: centroids, assignments = kmeans_cuda( self.samples, 50, init="random", device=1, verbosity=2, seed=3, tolerance=0.25, yinyang_t=0) centroids, assignments = kmeans_cuda( self.samples, 50, init=centroids, device=1, verbosity=2, seed=3, tolerance=0.05, yinyang_t=0) # one is 2nd stage init self.assertEqual(self._get_iters_number(self.stdout), 8) self._validate(centroids, assignments, 0.05)
def test_crap(self): with self.assertRaises(TypeError): kmeans_cuda(self.samples, "bullshit", init="random", device=1, verbosity=2, seed=3, tolerance=0.05, yinyang_t=0) with self.assertRaises(ValueError): kmeans_cuda(self.samples, 50, init="bullshit", device=1, verbosity=2, seed=3, tolerance=0.05, yinyang_t=0) with self.assertRaises(ValueError): kmeans_cuda(self.samples, 50, init="random", device=1, tolerance=100, yinyang_t=0) with self.assertRaises(ValueError): kmeans_cuda(self.samples, 50, init="random", device=1, yinyang_t=10)
def test_fp16(self): samples = self.samples.astype(numpy.float16) ca = kmeans_cuda(samples, 50, seed=777, verbosity=1) nb = knn_cuda(10, samples, *ca, verbosity=2, device=1) bn = NearestNeighbors(n_neighbors=10).fit(samples).kneighbors()[1] print("diff: %d" % (nb != bn).sum()) self.assertTrue((nb != bn).sum() < 500)
def _test_large(self, k, dev): cache = "/tmp/kmcuda_knn_cache_large.pickle" samples = numpy.random.rand(40000, 48).astype(numpy.float32) samples[:10000] += 1.0 samples[10000:20000] -= 1.0 samples[20000:30000, 0] += 2.0 samples[30000:, 0] -= 2.0 try: with open(cache, "rb") as fin: ca = pickle.load(fin) except: ca = kmeans_cuda(samples, 800, seed=777, verbosity=1) with open(cache, "wb") as fout: pickle.dump(ca, fout, protocol=-1) print("nan: %s" % numpy.nonzero(ca[0][:, 0] != ca[0][:, 0])[0]) nb = knn_cuda(k, samples, *ca, verbosity=2, device=dev) print("checking...") for i, sn in enumerate(nb): for j in range(len(sn) - 1): self.assertLessEqual( numpy.linalg.norm(samples[i] - samples[sn[j]]) - numpy.linalg.norm(samples[i] - samples[sn[j + 1]]), .0000003) mdist = numpy.linalg.norm(samples[i] - samples[sn[-1]]) sn = set(sn) for r in numpy.random.randint(0, high=len(samples), size=100): if r not in sn: if i == r: continue try: self.assertLessEqual( mdist, numpy.linalg.norm(samples[i] - samples[r])) except AssertionError as e: print(i, r) raise e from None
def test_kmeanspp_yinyang(self): with self.stdout: centroids, assignments = kmeans_cuda( self.samples, 50, init="kmeans++", device=1, verbosity=2, seed=3, tolerance=0.01, yinyang_t=0.1) self.assertEqual(self._get_iters_number(self.stdout), 15 + 3) self._validate(centroids, assignments, 0.01)
def test_afkmc2_lloyd_2gpus(self): with self.stdout: centroids, assignments = kmeans_cuda( self.samples, 50, init="afkmc2", device=0, verbosity=2, seed=3, tolerance=0.05, yinyang_t=0) self.assertEqual(self._get_iters_number(self.stdout), 4) self._validate(centroids, assignments, 0.05)
def cluster(bins, flattened_val, val, inertia=None): if USE_KMEANS_CUDA and kmeans_cuda: invalids = None int_bins = bins while invalids is None or int_bins - invalids < bins: if invalids: int_bins = bins + invalids codebook, _ = kmeans_cuda(flattened_val.reshape((-1, 1)), int_bins, device=1) invalids = np.count_nonzero( np.isnan(codebook).any(axis=1)) + np.count_nonzero( np.isneginf(codebook).any(axis=1)) + np.count_nonzero( np.isposinf(codebook).any(axis=1)) codebook = codebook[~np.isnan(codebook).any(axis=1)] codebook = codebook[~np.isneginf(codebook).any(axis=1)] codebook = codebook[~np.isposinf(codebook).any(axis=1)] else: kmeans = KMeans(n_clusters=bins) kmeans.fit(flattened_val.reshape((-1, 1))) codebook = kmeans.cluster_centers_ codebook = codebook.astype(val.dtype).flatten() compressed_val, codes = ConstantStore.codes_and_compressed( flattened_val, codebook, val.shape) if inertia is not None: inertia.append(kmeans.inertia_) return compressed_val, codes, codebook
def test_random_lloyd_same_device_ptr_all_devs(self): cuda = CUDA() devptr = cuda.api.allocate(self.samples.size * 4, 0) cuda.api.copy_to_device(devptr, self.samples) with self.stdout: cdevptr, adevptr = kmeans_cuda( (devptr, 0, self.samples.shape), 50, init="random", device=0, verbosity=2, seed=3, tolerance=0.05, yinyang_t=0) cuda.api.wrap(cdevptr, 0) cuda.api.wrap(adevptr, 0) try: self.assertEqual(self._get_iters_number(self.stdout), 7) self.assertIsInstance(cdevptr, int) self.assertIsInstance(adevptr, int) centroids = cuda.api.copy_to_host( cdevptr, 100, numpy.float32).reshape((50, 2)) assignments = cuda.api.copy_to_host( adevptr, 13000, numpy.uint32) self._validate(centroids, assignments, 0.05) new_samples = cuda.api.copy_to_host( devptr, self.samples.size, numpy.float32) finally: cuda.api.free(devptr) cuda.api.free(cdevptr) cuda.api.free(adevptr) self.assertTrue((self.samples.ravel() == new_samples.ravel()).all())
def test_hostptr(self): cents, asses = kmeans_cuda(self.samples, 50, seed=777, verbosity=1) samples_ptr = self.samples.__array_interface__["data"][0] centroids_ptr = cents.__array_interface__["data"][0] asses_ptr = asses.__array_interface__["data"][0] nb = knn_cuda(10, (samples_ptr, -1, self.samples.shape), (centroids_ptr, len(cents)), asses_ptr, verbosity=2) bn = NearestNeighbors(n_neighbors=10).fit(self.samples).kneighbors()[1] print("diff: %d" % (nb != bn).sum()) self.assertTrue((nb == bn).all()) with self.assertRaises(ValueError): knn_cuda(10, ("bullshit", -1, self.samples.shape), (centroids_ptr, len(cents)), asses_ptr, verbosity=2) with self.assertRaises(TypeError): knn_cuda(10, "bullshit", (centroids_ptr, len(cents)), asses_ptr, verbosity=2) with self.assertRaises(ValueError): knn_cuda(10, ("samples_ptr", -1, self.samples.shape), ("bullshit", len(cents)), asses_ptr, verbosity=2) with self.assertRaises(ValueError): knn_cuda(10, ("samples_ptr", -1, self.samples.shape), "bullshit", asses_ptr, verbosity=2) with self.assertRaises(ValueError): knn_cuda(10, ("samples_ptr", -1, self.samples.shape), (centroids_ptr, len(cents)), "bullshit", verbosity=2)
def k_means_vector_gpu_fp32(weight_vector, n_clusters, verbosity=0, seed=int(time.time()), gpu_id=7): if n_clusters == 1: mean_sample = np.mean(weight_vector, axis=1) weight_vector = np.tile(mean_sample, (weight_vector.shape[0], 1)) return weight_vector elif weight_vector.shape[1] == 1: return weight_vector elif weight_vector.shape[0] == n_clusters: return weight_vector else: init_centers = sklearn.cluster.k_means_._k_init( X=weight_vector, n_clusters=n_clusters, x_squared_norms=row_norms(weight_vector, squared=True), random_state=RandomState(seed)) centers, labels = kmeans_cuda(samples=weight_vector, clusters=n_clusters, init=init_centers, yinyang_t=0, seed=seed, device=gpu_id, verbosity=verbosity) weight_vector_compress = np.zeros( (weight_vector.shape[0], weight_vector.shape[1]), dtype=np.float32) for v in range(weight_vector.shape[0]): weight_vector_compress[v, :] = centers[labels[v], :] return weight_vector_compress
def __init__(self, X, n_clusters, n_init=3, method="kmcuda"): if method == "kmcuda": self.inertia = np.inf for _ in range(n_init): centers, y_pred = kmeans_cuda(X.astype(np.float32), n_clusters) full_idx = np.arange(len(X)) centroids_idxs = [] inertia = 0 for i in range(n_clusters): idx = full_idx[y_pred == i] if len(idx) != 0: X_sub = X[idx] norm = la.norm(X_sub - centers[i], axis=1) min_idx = norm.argmin() centroids_idxs.append(idx[min_idx]) inertia += np.sum(norm) else: centroids_idxs.append(0) centroids_idxs = np.array(centroids_idxs) if inertia < self.inertia: self.centers = centers self.y_pred = y_pred self.centroids_idxs = centroids_idxs elif method == "sklearn": km = KMeans(n_clusters, n_init=n_init) self.y_pred = km.fit_predict(X) self.centers = km.cluster_centers_ self.centroids_idxs = km.transform(X).argmin(axis=0) else: raise NotImplementedError
def main(argv): # Verify that parameters are set correctly. args = parser.parse_args(argv) gallery_pids, gallery_fids = common.load_dataset(args.gallery_dataset, None) log_file = os.path.join(exp_root, "recall_eval") logging.config.dictConfig(common.get_logging_dict(log_file)) log = logging.getLogger('recall_eval') with h5py.File(args.gallery_embeddings, 'r') as f_gallery: gallery_embs = np.array(f_gallery['emb']) #gallery_embs_var = np.array(f_gallery['emb_var']) #print('gallery_embs_var.shape =>',gallery_embs_var.shape) num_clusters = len(np.unique(gallery_pids)) print('Start clustering K ={}'.format(num_clusters)) #kmeans = KMeans(n_clusters=num_clusters, random_state=0).fit(gallery_embs) #print('NMI :: {}'.format(normalized_mutual_info_score(gallery_pids, kmeans.labels_))) centroids, assignments = kmeans_cuda(gallery_embs,num_clusters,seed=3) log.info(exp_root) log.info('NMI :: {}'.format(normalized_mutual_info_score(gallery_pids, assignments))) log.info('Clustering complete') log.info('Eval with Recall-K') names, accs = evaluate_emb(gallery_embs,gallery_pids) log.info(names) log.info(accs)
def test_fp16_cosine_metric(self): arr = numpy.empty((10000, 2), dtype=numpy.float16) angs = numpy.random.rand(10000) * 2 * numpy.pi for i in range(10000): arr[i] = numpy.sin(angs[i]), numpy.cos(angs[i]) with self.stdout: centroids, assignments = kmeans_cuda(arr, 4, init="kmeans++", metric="cos", device=1, verbosity=2, seed=3) self.assertEqual(self._get_iters_number(self.stdout), 5) self.assertEqual(len(centroids), 4) for c in centroids: norm = numpy.linalg.norm(c) self.assertTrue(0.9995 < norm < 1.0005) dists = numpy.round(cosine_distances(centroids)).astype(int) self.assertTrue((dists == [ [0, 2, 1, 1], [2, 0, 1, 1], [1, 1, 0, 2], [1, 1, 2, 0], ]).all()) self.assertEqual(numpy.min(assignments), 0) self.assertEqual(numpy.max(assignments), 3)
def _test_device_ptr(self, dev): cuda = CUDA() sdevptr = cuda.api.allocate(self.samples.size * 4, 0) cuda.api.copy_to_device(sdevptr, self.samples) cdevptr, adevptr = kmeans_cuda((sdevptr, 0, self.samples.shape), 50, init="random", device=0, verbosity=2, seed=3, tolerance=0.05, yinyang_t=0) cuda.api.wrap(cdevptr, 0) cuda.api.wrap(adevptr, 0) ndevptr = knn_cuda(10, (sdevptr, 0, self.samples.shape), (cdevptr, 50), adevptr, device=dev, verbosity=2) cuda.api.wrap(ndevptr, 0) try: nb = cuda.api.copy_to_host( ndevptr, self.samples.shape[0] * 10, numpy.uint32) \ .reshape((self.samples.shape[0], 10)) bn = NearestNeighbors(n_neighbors=10).fit( self.samples).kneighbors()[1] self.assertEqual((nb != bn).sum(), 0) finally: cuda.api.free(sdevptr) cuda.api.free(cdevptr) cuda.api.free(adevptr) cuda.api.free(ndevptr)
def basis_cluster(weight, num_basis, num_clusters, cuda=False): """Divide the weight into `num_basis` basis and clustering Params: - weight: weight matrix to do basis clustering - num_basis: number of basis, also the dimension of coordinates - num_cluster: number of clusters per basis Return: - basis: (Nb, Nc, E/Nb)the cluster centers for each basis. - coordinates: (V, Nb) the belongings for basis of each token. """ partial_embeddings = weight.chunk(num_basis, dim=1) coordinates = [] basis = [] if not cuda: from sklearn.cluster import KMeans clustor = KMeans(init='k-means++', n_clusters=num_clusters, n_init=10) for partial_embedding in partial_embeddings: if cuda: from libKMCUDA import kmeans_cuda centroid, coordinate = kmeans_cuda(partial_embedding.numpy(), num_clusters, seed=7) # some clusters may have zero elements, thus the centroids becomes [nan] in libKMCUDA centroid = np.nan_to_num(centroid) else: clustor.fit(partial_embedding.numpy()) centroid, coordinate = clustor.cluster_centers_, clustor.labels_ basis.append(torch.from_numpy(centroid.astype('float'))) coordinates.append(torch.from_numpy(coordinate.astype('int32'))) basis = torch.stack(basis).float() # Nb X Nc(clusters) X E/Nb coordinates = torch.stack(coordinates).t().long() # V X Nb(number of basis) return basis, coordinates
def fit(self, raw_batch): """ https://github.com/src-d/kmcuda#python-api due to performance reasons, uses kmcuda instead of sklearn's KMeans. :param raw_batch: :return: """ # do not consider single-packet flows raw_batch = raw_batch[raw_batch.raw_packet1 != 0] # form matrix (n_packet x (packet_size, IAT)) packet_features = raw_batch[self.raw_columns].values.reshape(-1, 2) # omit non_packet values packet_features = drop_nan_packets(packet_features) init_clusters = "k-means++" if self._cluster_centers is None else self._cluster_centers logger.info( 'fitting on {} packets, init clusters from data: {}'.format( packet_features.shape[0], isinstance(init_clusters, str))) packet_features = self.scaler.transform(packet_features) cluster_centers_, assignments = kmeans_cuda(samples=packet_features, clusters=self.n_clusters, tolerance=0.01, init=init_clusters, yinyang_t=0, metric="L2", average_distance=False, seed=1, device=0, verbosity=1) self._cluster_centers = cluster_centers_ self._evaluate(packet_features, cluster_centers_[assignments])
def test_cosine_metric2(self): samples = numpy.random.random((16000, 4)).astype(numpy.float32) samples /= numpy.linalg.norm(samples, axis=1)[:, numpy.newaxis] centroids, assignments = kmeans_cuda( samples, 50, metric="cos", verbosity=2, seed=3) for c in centroids: norm = numpy.linalg.norm(c) self.assertTrue(0.9999 < norm < 1.0001)
def test_fp16_kmeanspp_lloyd(self): samples = self.samples.astype(numpy.float16) with self.stdout: centroids, assignments = kmeans_cuda( samples, 50, init="kmeans++", device=1, verbosity=2, seed=3, tolerance=0.05, yinyang_t=0) self.assertEqual(self._get_iters_number(self.stdout), 5) centroids = centroids.astype(numpy.float32) self._validate(centroids, assignments, 0.05)
def test_fp16_afkmc2_lloyd(self): samples = self.samples.astype(numpy.float16) with self.stdout: centroids, assignments = kmeans_cuda( samples, 50, init="afkmc2", device=1, verbosity=2, seed=3, tolerance=0.05, yinyang_t=0) self.assertEqual(self._get_iters_number(self.stdout), 4) centroids = centroids.astype(numpy.float32) self._validate(centroids, assignments, 0.05)
def test_random_lloyd_all_gpus(self): with self.stdout: centroids, assignments = kmeans_cuda( self.samples, 50, init="random", device=0, verbosity=2, seed=3, tolerance=0.05, yinyang_t=0) self.assertEqual(self._get_iters_number(self.stdout), 7) self.assertEqual(centroids.shape, (50, 2)) self.assertEqual(assignments.shape, (13000,)) self._validate(centroids, assignments, 0.05)
def test_random_lloyd_all_explicit_gpus(self): with self.assertRaises(ValueError): centroids, assignments = kmeans_cuda(self.samples, 50, init="random", device=0xFFFF, verbosity=2, seed=3, tolerance=0.05, yinyang_t=0)
def kmeans_data(i_file,o_file,c_array): feature = np.load(i_file).astype('float32') print(feature.shape) for i in range(len(c_array)): #centroids,assignments = kmeans_cuda(feature,c_array[i],init="random",yinyang_t=0,metric="cos",verbosity=1) centroids, assignments = kmeans_cuda(feature, c_array[i], init="random", yinyang_t=0, verbosity=1) center_feature = delete_same_rows(centroids) center_feature,_ = sortd.custum_sort_matrix(center_feature,rule=True) #排序矩阵 np.save(o_file+str(center_feature.shape[0]),center_feature) rebm.rebuild_mnist(o_file+str(center_feature.shape[0])+".npy") #重构数据
def test_fp16_kmeanspp_yinyang(self): samples = self.samples.astype(numpy.float16) with self.stdout: centroids, assignments = kmeans_cuda( samples, 50, init="kmeans++", device=1, verbosity=2, seed=3, tolerance=0.01, yinyang_t=0.1) # fp16 precision increases the number of iterations self.assertEqual(self._get_iters_number(self.stdout), 19 + 5) centroids = centroids.astype(numpy.float32) self._validate(centroids, assignments, 0.01)
def _test_average_distance(self, dev): centroids, assignments, distance = kmeans_cuda( self.samples, 50, init="kmeans++", device=dev, verbosity=2, seed=3, tolerance=0.05, yinyang_t=0, average_distance=True) valid_dist = 0.0 for sample, ass in zip(self.samples, assignments): valid_dist += numpy.linalg.norm(sample - centroids[ass]) valid_dist /= self.samples.shape[0] self.assertLess(numpy.abs(valid_dist - distance), 1e-6)
def test_fp16_kmeanspp_yinyang(self): samples = self.samples.astype(numpy.float16) with self.stdout: centroids, assignments = kmeans_cuda( samples, 50, init="kmeans++", device=1, verbosity=2, seed=3, tolerance=0.01, yinyang_t=0.1) # fp16 precision increases the number of iterations self.assertEqual(self._get_iters_number(self.stdout), 16 + 7) centroids = centroids.astype(numpy.float32) self._validate(centroids, assignments, 0.0105)
def test_random_lloyd_host_ptr(self): hostptr = (self.samples.__array_interface__["data"][0], -1, self.samples.shape) with self.stdout: centroids, assignments = kmeans_cuda( hostptr, 50, init="random", device=0, verbosity=2, seed=3, tolerance=0.05, yinyang_t=0) self.assertEqual(self._get_iters_number(self.stdout), 7) self.assertEqual(centroids.shape, (50, 2)) self.assertEqual(assignments.shape, (13000,)) self._validate(centroids, assignments, 0.05) with self.assertRaises(ValueError): kmeans_cuda( ("bullshit", -1, self.samples.shape), 50, init="random", device=0, verbosity=2, seed=3, tolerance=0.05, yinyang_t=0) with self.assertRaises(TypeError): kmeans_cuda( "bullshit", 50, init="random", device=0, verbosity=2, seed=3, tolerance=0.05, yinyang_t=0)
def test_cosine_metric(self): samples = self.samples.copy() samples /= numpy.linalg.norm(samples, axis=1)[:, numpy.newaxis] ca = kmeans_cuda(samples, 50, seed=777, verbosity=1, metric="angular") nb = knn_cuda(40, samples, *ca, verbosity=2, device=1, metric="angular") bn = NearestNeighbors( n_neighbors=40, metric=lambda x, y: numpy.arccos(max(min(x.dot(y), 1), -1))) \ .fit(samples).kneighbors()[1] print("diff: %d" % (nb != bn).sum()) self.assertLessEqual((nb != bn).sum(), 114918)
def fit(self, dscData): if self.seed is not None: self.kmdata = kmcu.kmeans_cuda(dscData, self.n_clusters, metric=self.metric, verbosity=self.verbosity, seed=self.seed) else: self.kmdata = kmcu.kmeans_cuda(dscData, self.n_clusters, metric=self.metric, verbosity=self.verbosity) self.centroids_ = self.kmdata[0] tmpNumNAN = np.isnan(self.kmdata[0]).sum() if tmpNumNAN > 0: print('\t!!!warning!!! found {} NAN in kmean-centroids'.format( tmpNumNAN)) self.centroids_[np.isnan(self.centroids_)] = -2. self.labels_ = self.kmdata[1] return self
def fit(self, X): logging.info('Using GPU-accelerated K-Means...') self.cluster_centers_ = kmeans_cuda(X.astype(np.float32), clusters=self.k, seed=self.seed, init=self.init)[0].astype( np.float32) self.kmeans_obj.cluster_centers_ = self.cluster_centers_ if hasattr(self.kmeans_obj, '_check_params'): self.kmeans_obj._check_params( np.zeros_like(X)) # properly initialize return self.kmeans_obj
def test_crap(self): with self.assertRaises(TypeError): kmeans_cuda( self.samples, "bullshit", init="random", device=1, verbosity=2, seed=3, tolerance=0.05, yinyang_t=0) with self.assertRaises(ValueError): kmeans_cuda( self.samples, 50, init="bullshit", device=1, verbosity=2, seed=3, tolerance=0.05, yinyang_t=0) with self.assertRaises(ValueError): kmeans_cuda( self.samples, 50, init="random", device=1, tolerance=100, yinyang_t=0) with self.assertRaises(ValueError): kmeans_cuda( self.samples, 50, init="random", device=1, yinyang_t=10)
def test_fp16_random_lloyd(self): samples = self.samples.astype(numpy.float16) with self.stdout: centroids, assignments = kmeans_cuda( samples, 50, init="random", device=1, verbosity=2, seed=3, tolerance=0.05, yinyang_t=0) self.assertEqual(centroids.dtype, numpy.float16) centroids = centroids.astype(numpy.float32) self.assertEqual(self._get_iters_number(self.stdout), 7) self.assertEqual(sys.getrefcount(centroids), 2) self.assertEqual(sys.getrefcount(assignments), 2) self.assertEqual(sys.getrefcount(self.samples), 2) self.assertEqual(centroids.shape, (50, 2)) self.assertEqual(assignments.shape, (13000,)) self._validate(centroids, assignments, 0.05)
def test_kmeanspp_lloyd_uint32_overflow(self): print("initializing samples...") samples = numpy.empty((167772160, 8), dtype=numpy.float32) tile = numpy.hstack((self.samples,) * 4) for i in range(0, samples.shape[0], self.samples.shape[0]): end = i + self.samples.shape[0] if end < samples.shape[0]: samples[i:end] = tile else: samples[i:] = tile[:samples.shape[0] - i] print("running k-means...") try: with self.stdout: centroids, assignments = kmeans_cuda( samples, 50, init="kmeans++", device=0, verbosity=2, seed=3, tolerance=0.142, yinyang_t=0) self.assertEqual(self._get_iters_number(self.stdout), 2) except MemoryError: self.skipTest("Not enough GPU memory.")
def _test_device_ptr(self, dev): cuda = CUDA() sdevptr = cuda.api.allocate(self.samples.size * 4, 0) cuda.api.copy_to_device(sdevptr, self.samples) cdevptr, adevptr = kmeans_cuda( (sdevptr, 0, self.samples.shape), 50, init="random", device=0, verbosity=2, seed=3, tolerance=0.05, yinyang_t=0) cuda.api.wrap(cdevptr, 0) cuda.api.wrap(adevptr, 0) ndevptr = knn_cuda(10, (sdevptr, 0, self.samples.shape), (cdevptr, 50), adevptr, device=dev, verbosity=2) cuda.api.wrap(ndevptr, 0) try: nb = cuda.api.copy_to_host( ndevptr, self.samples.shape[0] * 10, numpy.uint32) \ .reshape((self.samples.shape[0], 10)) bn = NearestNeighbors(n_neighbors=10).fit(self.samples).kneighbors()[1] self.assertEqual((nb != bn).sum(), 0) finally: cuda.api.free(sdevptr) cuda.api.free(cdevptr) cuda.api.free(adevptr) cuda.api.free(ndevptr)
def test_fp16_cosine_metric(self): arr = numpy.empty((10000, 2), dtype=numpy.float16) angs = numpy.random.rand(10000) * 2 * numpy.pi for i in range(10000): arr[i] = numpy.sin(angs[i]), numpy.cos(angs[i]) with self.stdout: centroids, assignments = kmeans_cuda( arr, 4, init="kmeans++", metric="cos", device=1, verbosity=2, seed=3) self.assertEqual(self._get_iters_number(self.stdout), 5) self.assertEqual(len(centroids), 4) for c in centroids: norm = numpy.linalg.norm(c) self.assertTrue(0.9995 < norm < 1.0005) dists = numpy.round(cosine_distances(centroids)).astype(int) self.assertTrue((dists == [ [0, 2, 1, 1], [2, 0, 1, 1], [1, 1, 0, 2], [1, 1, 2, 0], ]).all()) self.assertEqual(numpy.min(assignments), 0) self.assertEqual(numpy.max(assignments), 3)
def _test_small(self, k, dev, dmax=0): ca = kmeans_cuda(self.samples, 50, seed=777, verbosity=1) nb = knn_cuda(k, self.samples, *ca, verbosity=2, device=dev) bn = NearestNeighbors(n_neighbors=k).fit(self.samples).kneighbors()[1] print("diff: %d" % (nb != bn).sum()) self.assertTrue((nb != bn).sum() <= dmax)
def test_random_lloyd_all_explicit_gpus(self): with self.assertRaises(ValueError): centroids, assignments = kmeans_cuda( self.samples, 50, init="random", device=0xFFFF, verbosity=2, seed=3, tolerance=0.05, yinyang_t=0)
def test_afkmc2_big_k_lloyd(self): with self.stdout: kmeans_cuda( self.samples, 200, init=("afkmc2", 100), device=0, verbosity=2, seed=3, tolerance=0.05, yinyang_t=0) self.assertEqual(self._get_iters_number(self.stdout), 4)