def cluster(): sample_number, k, r = 2 ** 12, 2, 4 center = 100 data_2d = np.array([int(i) for i in normal(center, 50, sample_number)]).reshape((sample_number / 2, 2)) domain, desired_amount_of_points = (0, 100), 2000 approximation, failure, eps, delta, promise = 0.1, 0.1, 0.5, 2 ** -20, 100 return cl.find(data_2d, 2, domain, desired_amount_of_points, approximation, failure, eps, delta, promise, True)
def cluster(): sample_number, k, r = 2**12, 2, 4 center = 100 data_2d = np.array([int(i) for i in normal(center, 50, sample_number)]).reshape( (sample_number / 2, 2)) domain, desired_amount_of_points = (0, 100), 2000 approximation, failure, eps, delta, promise = 0.1, 0.1, 0.5, 2**-20, 100 return cl.find(data_2d, 2, domain, desired_amount_of_points, approximation, failure, eps, delta, promise, True)
approximation, failure, eps, delta = 0.1, 0.1, 0.5, 2**-10 domain_end = max(abs(np.min(blob)), np.max(blob)) domain = (domain_end, 0.01) desired_amount_of_points = 1000 failure_bound = bound(sample_number, dimension, eps, delta, approximation) print "The probability of failure is somewhere between %s\n" % str(failure_bound) radius, center = find_cluster(blob, desired_amount_of_points) print "Test-radius: %d" % radius for i in xrange(8): middle_time = time.time() try: radius, center = cluster.find(blob, dimension, domain, desired_amount_of_points, approximation, failure, eps, delta, shrink=False, use_histograms=False) ball = [p for p in blob if euclidean(p, center) <= radius] print "Good-radius: %d" % radius # print "Good-center: %s" % str(center) print "Desired number of points in resulting ball: %d" % desired_amount_of_points print "Number of points in the resulting ball: %d" % len(ball) except ValueError: ball = [] print '_|_' end_time = time.time() print "Run-time: %.2f seconds\n" % (end_time - middle_time)
sample_number, dimension = 10000, 2 blobs = dss.make_blobs(sample_number, dimension, cluster_std=70) blob = np.round(blobs[0], 2) approximation, failure, eps, delta = 0.1, 0.1, 0.5, 2**-10 domain_end = max(abs(np.min(blob)), np.max(blob)) domain = (domain_end, 0.01) desired_amount_of_points = 500 start_time = time.time() radius, center = cluster.find(blob, dimension, domain, desired_amount_of_points, approximation, failure, eps, delta, use_histograms=True) end_time = time.time() ball = [p for p in blob if euclidean(p, center) <= radius] # blob = [p for p in blob if tuple(p) not in map(tuple, ball)] fig = plt.figure() # ax = fig.add_subplot(111, projection='3d') ax = fig.add_subplot(211, aspect='equal') zipped_data = zip(*blob) ax.scatter(*zipped_data, c='g') zipped_ball = zip(*ball)
dimension, domain = 2, (domain_end, 0.1) approximation, failure, eps, delta = 0.1, 0.1, 0.5, 2**-10 for c in clusters: test_radius, test_center = find_cluster(np.round(c, 1), desired_amount_of_points) print "Radius: %d" % test_radius print "Center: %s" % str(test_center) start_time = time.time() test_radius, test_center = find_cluster(data, desired_amount_of_points) print "Test-radius: %d" % test_radius print "Test-center: %s" % str(test_center) middle_time = time.time() print "Run-time: %.2f seconds\n" % (middle_time - start_time) radius, center = cluster.find(data, dimension, domain, desired_amount_of_points, approximation, failure, eps, delta) print "Good-radius: %d" % radius print "Good-center: %s" % str(center) end_time = time.time() print "Run-time: %.2f seconds" % (end_time - middle_time) fig = plt.figure() ax = fig.add_subplot(111, aspect='equal') ax.scatter(*zip(*data)) ax.annotate('center', xy=tuple(center), xytext=tuple(np.array(center)+100), arrowprops=dict(facecolor='black', shrink=0.05), ) phis = np.arange(0, 6.283, 0.01) ax.plot(*circle(test_radius, phis, test_center), c='g', ls='-') ax.plot(*circle(radius, phis, center), c='r', ls='-')