Python ClusteredFunction示例，submodlib.ClusteredFunction Python示例

示例#1

0

显示文件

def test():

    points, cluster_ids, centers = make_blobs(n_samples=num_samples,
                                              centers=num_clusters,
                                              n_features=num_features,
                                              center_box=(0, 100),
                                              cluster_std=cluster_std_dev,
                                              return_centers=True,
                                              random_state=4)
    data = list(map(tuple, points))
    print(data)

    # get num_set data points belonging to cluster#1
    random.seed(1)
    cluster1Indices = [
        index for index, val in enumerate(cluster_ids) if val == 1
    ]
    subset1 = random.sample(cluster1Indices, num_set)
    set1 = set(subset1[:-1])

    dataArray = np.array(data)

    print("Instantiating...")
    obj1 = ClusteredFunction(n=num_samples,
                             mode="multi",
                             f_name="GraphCut",
                             metric='euclidean',
                             data=dataArray,
                             num_clusters=num_internal_clusters,
                             cluster_lab=cluster_ids.tolist())
    print("Instantiated")

    eval1 = obj1.evaluate(set1)

    print("Instantiating...")
    obj2 = ClusteredFunction(n=num_samples,
                             mode="single",
                             f_name="GraphCut",
                             metric='euclidean',
                             data=dataArray,
                             num_clusters=num_internal_clusters,
                             cluster_lab=cluster_ids.tolist())
    print("Instantiated")

    eval2 = obj2.evaluate(set1)

    print("Eval1: ", eval1)
    print("Eval2: ", eval2)

示例#2

0

显示文件

def test():

    points, cluster_ids, centers = make_blobs(n_samples=num_samples,
                                              centers=num_clusters,
                                              n_features=num_features,
                                              center_box=(0, 100),
                                              cluster_std=cluster_std_dev,
                                              return_centers=True,
                                              random_state=4)
    data = list(map(tuple, points))
    print(data)

    # get num_set data points belonging to cluster#1
    random.seed(1)
    cluster1Indices = [
        index for index, val in enumerate(cluster_ids) if val == 1
    ]
    subset1 = random.sample(cluster1Indices, num_set)
    set1 = set(subset1[:-1])

    dataArray = np.array(data)

    # obj = DisparitySumFunction(n=num_samples, mode="sparse", data=dataArray, metric=metric, num_neighbors=num_sparse_neighbors)

    # obj = FeatureBasedFunction(n=num_samples, features=data, numFeatures=num_features, sparse=False)

    #obj = GraphCutFunction(n=num_samples, mode="sparse", data=dataArray, metric=metric, num_neighbors=num_sparse_neighbors)
    print("Instantiating...")
    obj = ClusteredFunction(n=num_samples,
                            mode="multi",
                            f_name="DisparitySum",
                            metric='euclidean',
                            data=dataArray,
                            num_clusters=num_internal_clusters)
    print("Instantiated")
    groundSet = obj.getEffectiveGroundSet()
    eval = obj.evaluate(groundSet)
    print(f"Eval on groundset = {eval}")
    elems = random.sample(set1, num_random)
    subset = set(elems[:-1])
    elem = elems[-1]
    print(f"Setting memoization for {subset}")
    obj.setMemoization(subset)
    zeroEval = obj.evaluate(subset)
    print(f"evaluate(subset) = {zeroEval}")
    firstEval = obj.evaluateWithMemoization(subset)
    print(f"evaluateWithMemoization(subset) = {firstEval}")
    print(f"Element to be added is {elem}")
    subset.add(elem)
    secondEval = obj.evaluate(subset)
    print(f"evaluate(subset++) = {secondEval}")
    naiveGain = secondEval - firstEval
    print("naiveGain:", naiveGain)
    subset.remove(elem)
    simpleGain = obj.marginalGain(subset, elem)
    print(f"gain(subset,elem) = {simpleGain}")
    fastGain = obj.marginalGainWithMemoization(subset, elem)
    print(f"gainFast(subset,elem) = {fastGain}")

示例#3

0

显示文件

def test():

    num_clusters = 3
    cluster_std_dev = 1
    num_samples = 8
    num_set = 3
    budget = 4

    points, cluster_ids, centers = make_blobs(n_samples=num_samples, centers=num_clusters, n_features=2, cluster_std=cluster_std_dev, center_box=(0,100), return_centers=True, random_state=4)
    data = list(map(tuple, points))
    xs = [x[0] for x in data]
    ys = [x[1] for x in data]

    # get num_set data points belonging to cluster#1
    
    random.seed(1)
    cluster1Indices = [index for index, val in enumerate(cluster_ids) if val == 1]
    subset1 = random.sample(cluster1Indices, num_set)
    subset1xs = [xs[x] for x in subset1]
    subset1ys = [ys[x] for x in subset1]
    # plt.scatter(xs, ys, s=25, color='black', label="Images")
    # plt.scatter(subset1xs, subset1ys, s=25, color='red', label="Subset1")
    # plt.show()
    set1 = set(subset1[:-1])

    # get num_set data points belonging to different clusters
    subset2 = []
    for i in range(num_set):
        #find the index of first point that belongs to cluster i
        diverse_index = cluster_ids.tolist().index(i)
        subset2.append(diverse_index)
    subset2xs = [xs[x] for x in subset2]
    subset2ys = [ys[x] for x in subset2]
    # plt.scatter(xs, ys, s=25, color='black', label="Images")
    # plt.scatter(subset2xs, subset2ys, s=25, color='red', label="Subset2")
    # plt.show()
    set2 = set(subset2[:-1])
    
    dataArray = np.array(data)

    from submodlib.functions.facilityLocation import FacilityLocationFunction

    # start = time.process_time()
    obj5 = FacilityLocationFunction(n=num_samples, data=dataArray, mode="clustered", metric="euclidean", num_clusters=num_clusters)
    # print(f"Time taken by instantiation = {time.process_time() - start}")
    print(f"Subset 1's FL value = {obj5.evaluate(set1)}")
    print(f"Subset 2's FL value = {obj5.evaluate(set2)}")
    print(f"Gain of adding another point ({subset1[-1]}) of same cluster to {set1} = {obj5.marginalGain(set1, subset1[-1])}")
    print(f"Gain of adding another point ({subset2[-1]}) of different cluster to {set1} = {obj5.marginalGain(set1, subset2[-1])}")
    obj5.setMemoization(set1)
    print(f"Subset 1's Fast FL value = {obj5.evaluateWithMemoization(set1)}")
    print(f"Fast gain of adding another point ({subset1[-1]}) of same cluster to {set1} = {obj5.marginalGainWithMemoization(set1, subset1[-1])}")
    # start = time.process_time()
    greedyList = obj5.maximize(budget=budget,optimizer='NaiveGreedy', stopIfZeroGain=False, stopIfNegativeGain=False, verbose=False)
    print(f"Greedy vector: {greedyList}")
    # print(f"Time taken by maximization = {time.process_time() - start}")
    # greedyXs = [xs[x[0]] for x in greedyList]
    # greedyYs = [ys[x[0]] for x in greedyList]
    # plt.scatter(xs, ys, s=25, color='black', label="Images")
    # plt.scatter(greedyXs, greedyYs, s=25, color='blue', label="Greedy Set")

    from submodlib import ClusteredFunction

    obj7 = ClusteredFunction(n=num_samples, mode="multi", f_name='FacilityLocation', metric='euclidean', data=dataArray, num_clusters=num_clusters)
    # print(f"Time taken by instantiation = {time.process_time() - start}")
    print(f"Subset 1's FL value = {obj7.evaluate(set1)}")
    print(f"Subset 2's FL value = {obj7.evaluate(set2)}")
    print(f"Gain of adding another point ({subset1[-1]}) of same cluster to {set1} = {obj7.marginalGain(set1, subset1[-1])}")
    print(f"Gain of adding another point ({subset2[-1]}) of different cluster to {set1} = {obj7.marginalGain(set1, subset2[-1])}")
    obj7.setMemoization(set1)
    print(f"Subset 1's Fast FL value = {obj7.evaluateWithMemoization(set1)}")
    print(f"Fast gain of adding another point ({subset1[-1]}) of same cluster to {set1} = {obj7.marginalGainWithMemoization(set1, subset1[-1])}")
    # start = time.process_time()
    greedyList = obj7.maximize(budget=budget,optimizer='NaiveGreedy', stopIfZeroGain=False, stopIfNegativeGain=False, verbose=False)
    print(f"Greedy vector: {greedyList}")

示例#4

0

显示文件

    [2,20,6, 2000], 
    [12,20,68, 200]
    ])
'''
data = np.array([(4.5, 13.5), (5, 13.5), (5.5, 13.5), (14.5, 13.5), (15, 13.5),
                 (15.5, 13.5), (4.5, 13), (5, 13), (5.5, 13), (14.5, 13),
                 (15, 13), (15.5, 13), (4.5, 12.5), (5, 12.5), (5.5, 12.5),
                 (14.5, 12.5), (15, 12.5), (15.5, 12.5), (4.5, 7.5), (5, 7.5),
                 (5.5, 7.5), (14.5, 7.5), (15, 7.5), (15.5, 7.5), (4.5, 7),
                 (5, 7), (5.5, 7), (14.5, 7), (15, 7), (15.5, 7), (4.5, 6.5),
                 (5, 6.5), (5.5, 6.5), (14.5, 6.5), (15, 6.5), (15.5, 6.5),
                 (7.5, 10), (12.5, 10), (10, 12.5), (10, 7.5), (4.5, 15.5),
                 (5, 9.5), (5, 10.5)])

#a, b, c = create_cluster(data.tolist(), 'euclidean', num_cluster = 2)
#print(a)
#print(c)
#print(b)
#obj = ClusteredFunction(43, 'FacilityLocation', a, b, c)

obj = ClusteredFunction(n=43, f_name='FacilityLocation', data=data)

X = {1, 23, 4}
print(obj.evaluate(X))

X = {1, 23, 4}
print(obj.marginalGain(X, 0))

#print(obj.maximize('NaiveGreedy', 10, False, False, False))
print(obj.maximize(10, 'NaiveGreedy', False, False, False))

示例#5

0

显示文件

def create_fl_clustered_user_single():
    return ClusteredFunction(n=num_samples, mode="multi", f_name='FacilityLocation', data=dataArray, metric="euclidean", num_clusters=num_clusters, cluster_lab=cluster_ids.tolist())

示例#6

0

显示文件

def create_fl_clustered_birch_single():
    return ClusteredFunction(n=num_samples, mode="single", f_name='FacilityLocation', data=dataArray, metric="euclidean", num_clusters=num_clusters)

示例#7

0

显示文件

def fl_clustered_user_single():
    obj = ClusteredFunction(n=num_samples, mode="multi", f_name='FacilityLocation', data=dataArray, metric="euclidean", num_clusters=num_clusters, cluster_lab=cluster_ids.tolist())
    obj.maximize(budget=budget,optimizer=optimizer, stopIfZeroGain=False, stopIfNegativeGain=False, verbose=False)

示例#8

0

显示文件

def fl_clustered_birch_single():
    obj = ClusteredFunction(n=num_samples, mode="single", f_name='FacilityLocation', data=dataArray, metric="euclidean", num_clusters=num_clusters)
    obj.maximize(budget=budget,optimizer=optimizer, stopIfZeroGain=False, stopIfNegativeGain=False, verbose=False)