Exemplo n.º 1
0
def FL_sparse(data):  #Case1 user only provides data and specifies
    obj = FacilityLocationFunction(n=43,
                                   data=data,
                                   mode="sparse",
                                   metric="euclidean",
                                   num_neigh=10)
    obj.maximize(10, 'NaiveGreedy', False, False, False)
Exemplo n.º 2
0
def FL_clustered_case1(data):  #Case1 user only provides data
    obj = FacilityLocationFunction(n=43,
                                   data=data,
                                   mode="clustered",
                                   metric="euclidean",
                                   num_cluster=10)
    obj.maximize(10, 'NaiveGreedy', False, False, False)
Exemplo n.º 3
0
 def test_4_3(self):  # X not a subset of ground set for evaluate()
     M = np.array([[1, 2], [3, 4]])
     obj = FacilityLocationFunction(n=2, sijs=M)
     X = {0, 2}
     try:
         obj.evaluate(X)
     except Exception as e:
         assert str(e) == "ERROR: X is not a subset of ground set"
Exemplo n.º 4
0
 def test_4_4(self):  # X not a subset of ground set for marginalGain()
     M = np.array([[1, 2], [3, 4]])
     obj = FacilityLocationFunction(n=2, sijs=M)
     X = {0, 2}
     try:
         obj.marginalGain(X, 1)
     except Exception as e:
         assert str(e) == "ERROR: X is not a subset of ground set"
Exemplo n.º 5
0
def FL_clustered_case2(
        data, lab):  #Case2 user also provides cluster info along with data
    obj = FacilityLocationFunction(n=43,
                                   data=data,
                                   cluster_lab=lab,
                                   mode="clustered",
                                   metric="euclidean",
                                   num_cluster=10)
    obj.maximize(10, 'NaiveGreedy', False, False, False)
Exemplo n.º 6
0
def fl_dense_cpp_kernel():
    obj = FacilityLocationFunction(n=num_samples,
                                   mode="dense",
                                   data=dataArray,
                                   metric="euclidean")
    obj.maximize(budget=budget,
                 optimizer=optimizer,
                 stopIfZeroGain=False,
                 stopIfNegativeGain=False,
                 verbose=False)
Exemplo n.º 7
0
def fl_dense_py_kernel():
    K_dense = create_kernel(dataArray, mode='dense', metric='euclidean')
    obj = FacilityLocationFunction(n=num_samples,
                                   mode="dense",
                                   sijs=K_dense,
                                   separate_rep=False)
    obj.maximize(budget=budget,
                 optimizer=optimizer,
                 stopIfZeroGain=False,
                 stopIfNegativeGain=False,
                 verbose=False)
def fl_dense_py_kernel_np_numba_array64():
    K_dense = helper.create_kernel_dense_np_numba(dataArray, 'euclidean')
    obj = FacilityLocationFunction(n=num_samples,
                                   mode="dense",
                                   sijs=K_dense,
                                   separate_rep=False,
                                   pybind_mode="array64")
    obj.maximize(budget=budget,
                 optimizer=optimizer,
                 stopIfZeroGain=False,
                 stopIfNegativeGain=False,
                 verbose=False)
Exemplo n.º 9
0
def f_3():  #more realistic test case
    M = np.array([[-0.78569, 0.75, 0.9, -0.56, 0.005],
                  [0.00006, 0.400906, -0.203, 0.9765, -0.9],
                  [0.1, 0.3, 0.5, 0.0023, 0.9],
                  [-0.1, 0.1, 0.1405, 0.0023, 0.3],
                  [-0.123456, 0.0789, 0.00456, 0.001, -0.9]])
    obj = FacilityLocationFunction(n=5, sijs=M)
    return obj
Exemplo n.º 10
0
 def test_4_2(self):  #Inconsistency between n and no of examples in M
     M = np.array([[1, 2, 3], [4, 5, 6]])
     try:
         FacilityLocationFunction(n=1, sijs=M)
     except Exception as e:
         assert str(
             e
         ) == "ERROR: Inconsistentcy between n and no of examples in the given similarity matrix"
Exemplo n.º 11
0
 def test_4_1(self):  #Non-square dense similarity matrix
     M = np.array([[1, 2, 3], [4, 5, 6]])
     try:
         FacilityLocationFunction(n=2, sijs=M)
     except Exception as e:
         assert str(
             e
         ) == "ERROR: Dense similarity matrix should be a square matrix if ground and master datasets are same"
Exemplo n.º 12
0
def f_5():
    data = np.array([[100, 21, 365, 5], [57, 18, -5, -6], [16, 255, 68, -8],
                     [2, 20, 6, 2000], [12, 20, 68, 200]])
    obj = FacilityLocationFunction(n=5,
                                   data=data,
                                   mode="dense",
                                   metric="cosine")
    return obj
Exemplo n.º 13
0
 def test_4_6(self):  # n==0
     data = np.array([[1, 2], [3, 4]])
     num_neigh, M = create_kernel(data, 'sparse', 'euclidean', num_neigh=1)
     try:
         FacilityLocationFunction(n=0, sijs=M, num_neigh=num_neigh)
     except Exception as e:
         assert str(
             e) == "ERROR: Number of elements in ground set can't be 0"
Exemplo n.º 14
0
def test():

    num_clusters = 3  #10 #100 #3
    cluster_std_dev = 1  #4 #4 #1
    num_samples = 9  #500 #5000 #9
    budget = 4  #10 #10 #4

    points, cluster_ids, centers = make_blobs(n_samples=num_samples,
                                              centers=num_clusters,
                                              n_features=2,
                                              cluster_std=cluster_std_dev,
                                              center_box=(0, 100),
                                              return_centers=True,
                                              random_state=4)
    data = list(map(tuple, points))
    xs = [x[0] for x in data]
    ys = [x[1] for x in data]
    #plt.scatter(xs, ys, s=25, color='black', label="Images")
    #plt.show()

    dataArray = np.array(data)

    from submodlib.functions.facilityLocation import FacilityLocationFunction
    obj = FacilityLocationFunction(n=num_samples,
                                   mode="dense",
                                   data=dataArray,
                                   metric="euclidean")
    print("Testing FacilityLocation's maximize")

    # from submodlib.functions.disparitySum import DisparitySumFunction
    # obj = DisparitySumFunction(n=num_samples, mode="dense", data=dataArray, metric="euclidean")
    # print("Testing DisparitySum's maximize")

    #greedyList = obj.maximize(budget=budget, optimizer='NaiveGreedy', stopIfZeroGain=False, stopIfNegativeGain=False, verbose=False)
    greedyList = obj.maximize(budget=budget,
                              optimizer='LazyGreedy',
                              stopIfZeroGain=False,
                              stopIfNegativeGain=False,
                              verbose=False)
    #greedyList = obj.maximize(budget=budget, optimizer='StochasticGreedy', stopIfZeroGain=False, stopIfNegativeGain=False, verbose=False)
    #greedyList = obj.maximize(budget=budget, optimizer='LazierThanLazyGreedy', stopIfZeroGain=False, stopIfNegativeGain=False, verbose=False)
    print(f"Greedy vector: {greedyList}")
    greedyXs = [xs[x[0]] for x in greedyList]
    greedyYs = [ys[x[0]] for x in greedyList]
Exemplo n.º 15
0
def f_7():
    data = np.array([[100, 21, 365, 5], [57, 18, -5, -6], [16, 255, 68, -8],
                     [2, 20, 6, 2000], [12, 20, 68, 200]])

    num_cluster = 2
    obj = FacilityLocationFunction(n=5,
                                   data=data,
                                   mode="clustered",
                                   metric="euclidean",
                                   num_cluster=num_cluster)
    return obj
Exemplo n.º 16
0
 def test_4_5(
     self
 ):  # If sparse matrix is provided but without providing number of neighbors that were used to create it
     data = np.array([[1, 2], [3, 4]])
     num_neigh, M = create_kernel(data, 'sparse', 'euclidean', num_neigh=1)
     try:
         FacilityLocationFunction(
             n=2, sijs=M
         )  #its important for user to pass num_neigh with sparse matrix because otherwise
         #there is no way for Python FL and C++ FL to know how many nearest neighours were
         #reatined in sparse matrix
     except Exception as e:
         assert str(
             e) == "ERROR: num_neigh for given sparse matrix not provided"
Exemplo n.º 17
0
def create_fl_dense_py_kernel(num_samples, pyDenseKernel):
    return FacilityLocationFunction(n=num_samples,
                                    mode="dense",
                                    sijs=pyDenseKernel,
                                    separate_rep=False)
Exemplo n.º 18
0
def fl_dense_py_kernel_other_array():
    K_dense = helper.create_kernel(dataArray, mode="dense", metric='euclidean', method="other")
    obj = FacilityLocationFunction(n=num_samples, mode="dense", sijs=K_dense, separate_rep=False,pybind_mode="array")
    obj.maximize(budget=budget,optimizer=optimizer, stopIfZeroGain=False, stopIfNegativeGain=False, verbose=False, show_progress=False)
Exemplo n.º 19
0
def create_fl_dense_cpp_kernel(num_samples, dataArray):
    return FacilityLocationFunction(n=num_samples,
                                    mode="dense",
                                    data=dataArray,
                                    metric="euclidean")
Exemplo n.º 20
0
def test():

    num_clusters = 3
    cluster_std_dev = 1
    num_samples = 8
    num_set = 3
    budget = 4

    points, cluster_ids, centers = make_blobs(n_samples=num_samples, centers=num_clusters, n_features=2, cluster_std=cluster_std_dev, center_box=(0,100), return_centers=True, random_state=4)
    data = list(map(tuple, points))
    xs = [x[0] for x in data]
    ys = [x[1] for x in data]

    # get num_set data points belonging to cluster#1
    
    random.seed(1)
    cluster1Indices = [index for index, val in enumerate(cluster_ids) if val == 1]
    subset1 = random.sample(cluster1Indices, num_set)
    subset1xs = [xs[x] for x in subset1]
    subset1ys = [ys[x] for x in subset1]
    # plt.scatter(xs, ys, s=25, color='black', label="Images")
    # plt.scatter(subset1xs, subset1ys, s=25, color='red', label="Subset1")
    # plt.show()
    set1 = set(subset1[:-1])

    # get num_set data points belonging to different clusters
    subset2 = []
    for i in range(num_set):
        #find the index of first point that belongs to cluster i
        diverse_index = cluster_ids.tolist().index(i)
        subset2.append(diverse_index)
    subset2xs = [xs[x] for x in subset2]
    subset2ys = [ys[x] for x in subset2]
    # plt.scatter(xs, ys, s=25, color='black', label="Images")
    # plt.scatter(subset2xs, subset2ys, s=25, color='red', label="Subset2")
    # plt.show()
    set2 = set(subset2[:-1])
    
    dataArray = np.array(data)

    from submodlib.functions.facilityLocation import FacilityLocationFunction

    # start = time.process_time()
    obj5 = FacilityLocationFunction(n=num_samples, data=dataArray, mode="clustered", metric="euclidean", num_clusters=num_clusters)
    # print(f"Time taken by instantiation = {time.process_time() - start}")
    print(f"Subset 1's FL value = {obj5.evaluate(set1)}")
    print(f"Subset 2's FL value = {obj5.evaluate(set2)}")
    print(f"Gain of adding another point ({subset1[-1]}) of same cluster to {set1} = {obj5.marginalGain(set1, subset1[-1])}")
    print(f"Gain of adding another point ({subset2[-1]}) of different cluster to {set1} = {obj5.marginalGain(set1, subset2[-1])}")
    obj5.setMemoization(set1)
    print(f"Subset 1's Fast FL value = {obj5.evaluateWithMemoization(set1)}")
    print(f"Fast gain of adding another point ({subset1[-1]}) of same cluster to {set1} = {obj5.marginalGainWithMemoization(set1, subset1[-1])}")
    # start = time.process_time()
    greedyList = obj5.maximize(budget=budget,optimizer='NaiveGreedy', stopIfZeroGain=False, stopIfNegativeGain=False, verbose=False)
    print(f"Greedy vector: {greedyList}")
    # print(f"Time taken by maximization = {time.process_time() - start}")
    # greedyXs = [xs[x[0]] for x in greedyList]
    # greedyYs = [ys[x[0]] for x in greedyList]
    # plt.scatter(xs, ys, s=25, color='black', label="Images")
    # plt.scatter(greedyXs, greedyYs, s=25, color='blue', label="Greedy Set")

    from submodlib import ClusteredFunction

    obj7 = ClusteredFunction(n=num_samples, mode="multi", f_name='FacilityLocation', metric='euclidean', data=dataArray, num_clusters=num_clusters)
    # print(f"Time taken by instantiation = {time.process_time() - start}")
    print(f"Subset 1's FL value = {obj7.evaluate(set1)}")
    print(f"Subset 2's FL value = {obj7.evaluate(set2)}")
    print(f"Gain of adding another point ({subset1[-1]}) of same cluster to {set1} = {obj7.marginalGain(set1, subset1[-1])}")
    print(f"Gain of adding another point ({subset2[-1]}) of different cluster to {set1} = {obj7.marginalGain(set1, subset2[-1])}")
    obj7.setMemoization(set1)
    print(f"Subset 1's Fast FL value = {obj7.evaluateWithMemoization(set1)}")
    print(f"Fast gain of adding another point ({subset1[-1]}) of same cluster to {set1} = {obj7.marginalGainWithMemoization(set1, subset1[-1])}")
    # start = time.process_time()
    greedyList = obj7.maximize(budget=budget,optimizer='NaiveGreedy', stopIfZeroGain=False, stopIfNegativeGain=False, verbose=False)
    print(f"Greedy vector: {greedyList}")
Exemplo n.º 21
0
import pytest
import numpy as np
from scipy import sparse
import scipy
from submodlib.functions.facilityLocation import FacilityLocationFunction
from submodlib.helper import create_kernel
from submodlib_cpp import FacilityLocation

data=np.array([
    [100, 21, 365, 5], 
    [57, 18, -5, -6], 
    [16, 255, 68, -8], 
    [2,20,6, 2000], 
    [12,20,68, 200]
    ])


s = {1}
obj = FacilityLocationFunction(n=5, data=data, mode="sparse", metric="cosine")

print(obj.maximize(3,'NaiveGreedy', False, False, False))
Exemplo n.º 22
0
def create_fl_mode_user():
    return FacilityLocationFunction(n=num_samples, mode="clustered", data=dataArray, metric="euclidean", num_clusters=num_clusters, cluster_labels=cluster_ids.tolist())
Exemplo n.º 23
0
def create_fl_mode_birch():
    return FacilityLocationFunction(n=num_samples, mode="clustered", data=dataArray, metric="euclidean", num_clusters=num_clusters)
Exemplo n.º 24
0
#A dryrun of implemented code with dummy data
import numpy as np
from submodlib.functions.facilityLocation import FacilityLocationFunction
from submodlib.helper import create_kernel

data = np.array([[1, 2, 3], [3, 4, 5], [4, 5, 6]])

#dryrun of create_kernel
n_, K_dense = create_kernel(data, 'dense', 'euclidean')
print(K_dense)
n_, K_sparse = create_kernel(data, 'sparse', 'euclidean', num_neigh=2)
print(K_sparse)

#dryrun of C++ FL and Python FL when user provides similarity matrix
#1) with dense matrix
obj = FacilityLocationFunction(n=3, sijs=K_dense)
X = {1}
print(obj.evaluate(X))
X = {1, 2}
print(obj.evaluate(X))
X = {1}
print(obj.marginalGain(X, 2))

#2) with sparse matrix
obj = FacilityLocationFunction(n=3, sijs=K_sparse, num_neigh=2)

#dryrun of C++ FL and Python FL when user provides data
#1) with dense mode
obj = FacilityLocationFunction(n=3,
                               data=data,
                               mode="dense",
Exemplo n.º 25
0
def f_1():  # A simple easy to calculate test case
    M = np.array([[1, 3, 2], [5, 4, 3], [4, 7, 5]])
    obj = FacilityLocationFunction(n=3, sijs=M)
    return obj
Exemplo n.º 26
0
def create_fl_sparse_cpp_kernel(num_samples, dataArray, num_neighbors):
    return FacilityLocationFunction(n=num_samples,
                                    mode="sparse",
                                    data=dataArray,
                                    metric="euclidean",
                                    num_neighbors=num_neighbors)
Exemplo n.º 27
0
def f_2():  #Boundary case of just one element
    M = np.array([-0.78569])
    obj = FacilityLocationFunction(n=1, sijs=M)
    return obj
Exemplo n.º 28
0
def FL_case2(M):  #Case2 user directly provides kernel
    obj = FacilityLocationFunction(n=43, sijs=M, num_neigh=10)
    obj.maximize(10, 'NaiveGreedy', False, False, False)
Exemplo n.º 29
0
def create_fl_sparse_py_kernel(num_samples, pySparseKernel, num_neighbors):
    return FacilityLocationFunction(n=num_samples,
                                    mode="sparse",
                                    sijs=pySparseKernel,
                                    num_neighbors=num_neighbors)
Exemplo n.º 30
0
def fl_mode_user():
    obj = FacilityLocationFunction(n=num_samples, mode="clustered", data=dataArray, metric="euclidean", num_clusters=num_clusters, cluster_labels=cluster_ids.tolist())
    obj.maximize(budget=budget,optimizer=optimizer, stopIfZeroGain=False, stopIfNegativeGain=False, verbose=False)