def test(): points, cluster_ids, centers = make_blobs(n_samples=num_samples, centers=num_clusters, n_features=num_features, center_box=(0, 100), cluster_std=cluster_std_dev, return_centers=True, random_state=4) data = list(map(tuple, points)) print(data) # get num_set data points belonging to cluster#1 random.seed(1) cluster1Indices = [ index for index, val in enumerate(cluster_ids) if val == 1 ] subset1 = random.sample(cluster1Indices, num_set) set1 = set(subset1[:-1]) dataArray = np.array(data) print("Instantiating...") obj1 = ClusteredFunction(n=num_samples, mode="multi", f_name="GraphCut", metric='euclidean', data=dataArray, num_clusters=num_internal_clusters, cluster_lab=cluster_ids.tolist()) print("Instantiated") eval1 = obj1.evaluate(set1) print("Instantiating...") obj2 = ClusteredFunction(n=num_samples, mode="single", f_name="GraphCut", metric='euclidean', data=dataArray, num_clusters=num_internal_clusters, cluster_lab=cluster_ids.tolist()) print("Instantiated") eval2 = obj2.evaluate(set1) print("Eval1: ", eval1) print("Eval2: ", eval2)
def test(): points, cluster_ids, centers = make_blobs(n_samples=num_samples, centers=num_clusters, n_features=num_features, center_box=(0, 100), cluster_std=cluster_std_dev, return_centers=True, random_state=4) data = list(map(tuple, points)) print(data) # get num_set data points belonging to cluster#1 random.seed(1) cluster1Indices = [ index for index, val in enumerate(cluster_ids) if val == 1 ] subset1 = random.sample(cluster1Indices, num_set) set1 = set(subset1[:-1]) dataArray = np.array(data) # obj = DisparitySumFunction(n=num_samples, mode="sparse", data=dataArray, metric=metric, num_neighbors=num_sparse_neighbors) # obj = FeatureBasedFunction(n=num_samples, features=data, numFeatures=num_features, sparse=False) #obj = GraphCutFunction(n=num_samples, mode="sparse", data=dataArray, metric=metric, num_neighbors=num_sparse_neighbors) print("Instantiating...") obj = ClusteredFunction(n=num_samples, mode="multi", f_name="DisparitySum", metric='euclidean', data=dataArray, num_clusters=num_internal_clusters) print("Instantiated") groundSet = obj.getEffectiveGroundSet() eval = obj.evaluate(groundSet) print(f"Eval on groundset = {eval}") elems = random.sample(set1, num_random) subset = set(elems[:-1]) elem = elems[-1] print(f"Setting memoization for {subset}") obj.setMemoization(subset) zeroEval = obj.evaluate(subset) print(f"evaluate(subset) = {zeroEval}") firstEval = obj.evaluateWithMemoization(subset) print(f"evaluateWithMemoization(subset) = {firstEval}") print(f"Element to be added is {elem}") subset.add(elem) secondEval = obj.evaluate(subset) print(f"evaluate(subset++) = {secondEval}") naiveGain = secondEval - firstEval print("naiveGain:", naiveGain) subset.remove(elem) simpleGain = obj.marginalGain(subset, elem) print(f"gain(subset,elem) = {simpleGain}") fastGain = obj.marginalGainWithMemoization(subset, elem) print(f"gainFast(subset,elem) = {fastGain}")
def test(): num_clusters = 3 cluster_std_dev = 1 num_samples = 8 num_set = 3 budget = 4 points, cluster_ids, centers = make_blobs(n_samples=num_samples, centers=num_clusters, n_features=2, cluster_std=cluster_std_dev, center_box=(0,100), return_centers=True, random_state=4) data = list(map(tuple, points)) xs = [x[0] for x in data] ys = [x[1] for x in data] # get num_set data points belonging to cluster#1 random.seed(1) cluster1Indices = [index for index, val in enumerate(cluster_ids) if val == 1] subset1 = random.sample(cluster1Indices, num_set) subset1xs = [xs[x] for x in subset1] subset1ys = [ys[x] for x in subset1] # plt.scatter(xs, ys, s=25, color='black', label="Images") # plt.scatter(subset1xs, subset1ys, s=25, color='red', label="Subset1") # plt.show() set1 = set(subset1[:-1]) # get num_set data points belonging to different clusters subset2 = [] for i in range(num_set): #find the index of first point that belongs to cluster i diverse_index = cluster_ids.tolist().index(i) subset2.append(diverse_index) subset2xs = [xs[x] for x in subset2] subset2ys = [ys[x] for x in subset2] # plt.scatter(xs, ys, s=25, color='black', label="Images") # plt.scatter(subset2xs, subset2ys, s=25, color='red', label="Subset2") # plt.show() set2 = set(subset2[:-1]) dataArray = np.array(data) from submodlib.functions.facilityLocation import FacilityLocationFunction # start = time.process_time() obj5 = FacilityLocationFunction(n=num_samples, data=dataArray, mode="clustered", metric="euclidean", num_clusters=num_clusters) # print(f"Time taken by instantiation = {time.process_time() - start}") print(f"Subset 1's FL value = {obj5.evaluate(set1)}") print(f"Subset 2's FL value = {obj5.evaluate(set2)}") print(f"Gain of adding another point ({subset1[-1]}) of same cluster to {set1} = {obj5.marginalGain(set1, subset1[-1])}") print(f"Gain of adding another point ({subset2[-1]}) of different cluster to {set1} = {obj5.marginalGain(set1, subset2[-1])}") obj5.setMemoization(set1) print(f"Subset 1's Fast FL value = {obj5.evaluateWithMemoization(set1)}") print(f"Fast gain of adding another point ({subset1[-1]}) of same cluster to {set1} = {obj5.marginalGainWithMemoization(set1, subset1[-1])}") # start = time.process_time() greedyList = obj5.maximize(budget=budget,optimizer='NaiveGreedy', stopIfZeroGain=False, stopIfNegativeGain=False, verbose=False) print(f"Greedy vector: {greedyList}") # print(f"Time taken by maximization = {time.process_time() - start}") # greedyXs = [xs[x[0]] for x in greedyList] # greedyYs = [ys[x[0]] for x in greedyList] # plt.scatter(xs, ys, s=25, color='black', label="Images") # plt.scatter(greedyXs, greedyYs, s=25, color='blue', label="Greedy Set") from submodlib import ClusteredFunction obj7 = ClusteredFunction(n=num_samples, mode="multi", f_name='FacilityLocation', metric='euclidean', data=dataArray, num_clusters=num_clusters) # print(f"Time taken by instantiation = {time.process_time() - start}") print(f"Subset 1's FL value = {obj7.evaluate(set1)}") print(f"Subset 2's FL value = {obj7.evaluate(set2)}") print(f"Gain of adding another point ({subset1[-1]}) of same cluster to {set1} = {obj7.marginalGain(set1, subset1[-1])}") print(f"Gain of adding another point ({subset2[-1]}) of different cluster to {set1} = {obj7.marginalGain(set1, subset2[-1])}") obj7.setMemoization(set1) print(f"Subset 1's Fast FL value = {obj7.evaluateWithMemoization(set1)}") print(f"Fast gain of adding another point ({subset1[-1]}) of same cluster to {set1} = {obj7.marginalGainWithMemoization(set1, subset1[-1])}") # start = time.process_time() greedyList = obj7.maximize(budget=budget,optimizer='NaiveGreedy', stopIfZeroGain=False, stopIfNegativeGain=False, verbose=False) print(f"Greedy vector: {greedyList}")
[2,20,6, 2000], [12,20,68, 200] ]) ''' data = np.array([(4.5, 13.5), (5, 13.5), (5.5, 13.5), (14.5, 13.5), (15, 13.5), (15.5, 13.5), (4.5, 13), (5, 13), (5.5, 13), (14.5, 13), (15, 13), (15.5, 13), (4.5, 12.5), (5, 12.5), (5.5, 12.5), (14.5, 12.5), (15, 12.5), (15.5, 12.5), (4.5, 7.5), (5, 7.5), (5.5, 7.5), (14.5, 7.5), (15, 7.5), (15.5, 7.5), (4.5, 7), (5, 7), (5.5, 7), (14.5, 7), (15, 7), (15.5, 7), (4.5, 6.5), (5, 6.5), (5.5, 6.5), (14.5, 6.5), (15, 6.5), (15.5, 6.5), (7.5, 10), (12.5, 10), (10, 12.5), (10, 7.5), (4.5, 15.5), (5, 9.5), (5, 10.5)]) #a, b, c = create_cluster(data.tolist(), 'euclidean', num_cluster = 2) #print(a) #print(c) #print(b) #obj = ClusteredFunction(43, 'FacilityLocation', a, b, c) obj = ClusteredFunction(n=43, f_name='FacilityLocation', data=data) X = {1, 23, 4} print(obj.evaluate(X)) X = {1, 23, 4} print(obj.marginalGain(X, 0)) #print(obj.maximize('NaiveGreedy', 10, False, False, False)) print(obj.maximize(10, 'NaiveGreedy', False, False, False))
def create_fl_clustered_user_single(): return ClusteredFunction(n=num_samples, mode="multi", f_name='FacilityLocation', data=dataArray, metric="euclidean", num_clusters=num_clusters, cluster_lab=cluster_ids.tolist())
def create_fl_clustered_birch_single(): return ClusteredFunction(n=num_samples, mode="single", f_name='FacilityLocation', data=dataArray, metric="euclidean", num_clusters=num_clusters)
def fl_clustered_user_single(): obj = ClusteredFunction(n=num_samples, mode="multi", f_name='FacilityLocation', data=dataArray, metric="euclidean", num_clusters=num_clusters, cluster_lab=cluster_ids.tolist()) obj.maximize(budget=budget,optimizer=optimizer, stopIfZeroGain=False, stopIfNegativeGain=False, verbose=False)
def fl_clustered_birch_single(): obj = ClusteredFunction(n=num_samples, mode="single", f_name='FacilityLocation', data=dataArray, metric="euclidean", num_clusters=num_clusters) obj.maximize(budget=budget,optimizer=optimizer, stopIfZeroGain=False, stopIfNegativeGain=False, verbose=False)