def test_neg1(self): # Number of neighbors more than no of data points data = np.array([[0, 1, 3], [5, 1, 5], [10, 2, 6], [12, 20, 68]]) try: create_kernel(data, 'sparse', 'cosine', num_neigh=6) except Exception as e: assert str( e ) == "ERROR: num of neighbors can't be more than no of datapoints"
def test_sparse_euclidean(self, data): ED = euclidean_distances(data) gamma = 1 / np.shape(data)[1] ES = np.exp(-ED * gamma) ES_csr = sparse.csr_matrix(ES) #sklearn ground truth _, M = create_kernel(data, 'sparse', 'euclidean') assert np.allclose(M.todense(), ES_csr.todense())
def test_4_6(self): # n==0 data = np.array([[1, 2], [3, 4]]) num_neigh, M = create_kernel(data, 'sparse', 'euclidean', num_neigh=1) try: FacilityLocationFunction(n=0, sijs=M, num_neigh=num_neigh) except Exception as e: assert str( e) == "ERROR: Number of elements in ground set can't be 0"
def fl_dense_py_kernel(): K_dense = create_kernel(dataArray, mode='dense', metric='euclidean') obj = FacilityLocationFunction(n=num_samples, mode="dense", sijs=K_dense, separate_rep=False) obj.maximize(budget=budget, optimizer=optimizer, stopIfZeroGain=False, stopIfNegativeGain=False, verbose=False)
def test_4_5( self ): # If sparse matrix is provided but without providing number of neighbors that were used to create it data = np.array([[1, 2], [3, 4]]) num_neigh, M = create_kernel(data, 'sparse', 'euclidean', num_neigh=1) try: FacilityLocationFunction( n=2, sijs=M ) #its important for user to pass num_neigh with sparse matrix because otherwise #there is no way for Python FL and C++ FL to know how many nearest neighours were #reatined in sparse matrix except Exception as e: assert str( e) == "ERROR: num_neigh for given sparse matrix not provided"
def test_neg3(self): # Incorrect metric data = np.array([[0, 1, 3], [5, 1, 5], [10, 2, 6], [12, 20, 68]]) try: create_kernel(data, 'sparse', 'cosinee', num_neigh=3) except Exception as e: assert str(e) == "ERROR: unsupported metric"
def py_dense_kernel(): K_dense = create_kernel(dataArray, mode='dense', metric='euclidean') return K_dense
def test_cosine_neigh3(self, data, val): _, M = create_kernel(data, 'sparse', 'cosine', 3) assert np.allclose(M.todense(), val)
def test_euclidean_neigh3(self, data, val): _, M = create_kernel(data, 'sparse', 'euclidean', 3) assert np.allclose(M.todense(), val)
def test_sparse_cosine(self, data): CS = cosine_similarity(data) CS_csr = sparse.csr_matrix(CS) #sklearn ground truth _, M = create_kernel(data, 'sparse', 'cosine') assert np.allclose(M.todense(), CS_csr.todense())
def test_euclidean_neigh2(self, data, val): M = create_kernel(data, mode='sparse', metric='euclidean', num_neigh=2) assert np.allclose(M.todense(), val)
def test_dense_cosine(self, data): CS = cosine_similarity(data) #sklearn ground truth _, M = create_kernel(data, 'dense', 'cosine') assert np.allclose(M, CS)
query_features.append(tuple(points[q_ind])) pointsMinusQuery.remove(tuple(points[q_ind])) # get a subset with num_set data points set1 = set(random.sample(range(num_samples - num_queries), num_set)) imageData = np.array(pointsMinusQuery) queryData = np.array(query_features) return (num_samples - num_queries, num_queries, imageData, queryData, set1) num_data, num_q, imageData, queryData, _ = data_queries() print("Image data: ", imageData) print("Query data: ", queryData) imageKernel = create_kernel(imageData, mode="dense", metric=metric) queryKernel = create_kernel(queryData, mode="dense", metric=metric, X_rep=imageData) queryQueryKernel = create_kernel(queryData, mode="dense", metric=metric) print("Image-Image Kernel: ", imageKernel) print("Image-Query Kernel: ", queryKernel) print("Query-Query Kernel: ", queryQueryKernel) obj = LogDeterminantMutualInformationFunction( n=num_data, num_queries=num_q, data_sijs=imageKernel, query_sijs=queryKernel, query_query_sijs=queryQueryKernel, lambdaVal=logDetLambdaVal,
def py_dense_kernel(dataArray): #print("Calling py dense kernel with ", dataArray.shape[0], " elements in ground set") K_dense = create_kernel(dataArray, mode='dense', metric='euclidean') return K_dense
import numpy as np #import submodlib_cpp as subcp import submodlib.helper as helper groundData =np.array( [(4.5,13.5), (5,13.5), (5.5,13.5)] ) #methods = ["sklearn", "fastdist", "scipy", "rowwise", "np", "np_numba", "other"] methods = ["sklearn", "rowwise", "np"] for method in methods: print("\n***Kernel from ", method) kernel = helper.create_kernel(groundData, metric="dot", method=method) print(kernel) # for method in methods: # print("\n***Kernel from ", method) # kernel = helper.create_kernel(groundData, metric="cosine", method=method) # print(kernel)
#A dryrun of implemented code with dummy data import numpy as np from submodlib.functions.facilityLocation import FacilityLocationFunction from submodlib.helper import create_kernel data = np.array([[1, 2, 3], [3, 4, 5], [4, 5, 6]]) #dryrun of create_kernel n_, K_dense = create_kernel(data, 'dense', 'euclidean') print(K_dense) n_, K_sparse = create_kernel(data, 'sparse', 'euclidean', num_neigh=2) print(K_sparse) #dryrun of C++ FL and Python FL when user provides similarity matrix #1) with dense matrix obj = FacilityLocationFunction(n=3, sijs=K_dense) X = {1} print(obj.evaluate(X)) X = {1, 2} print(obj.evaluate(X)) X = {1} print(obj.marginalGain(X, 2)) #2) with sparse matrix obj = FacilityLocationFunction(n=3, sijs=K_sparse, num_neigh=2) #dryrun of C++ FL and Python FL when user provides data #1) with dense mode obj = FacilityLocationFunction(n=3, data=data, mode="dense",
def test_neg3(self): # Incorrect metric data = np.array([[0, 1, 3], [5, 1, 5], [10, 2, 6], [12,20,68]]) with pytest.raises(Exception): create_kernel(data, mode='sparse', metric='cosinee', num_neigh=3)
def test_neg1(self): # Number of neighbors more than no of data points data = np.array([[0, 1, 3], [5, 1, 5], [10, 2, 6], [12,20,68]]) with pytest.raises(Exception): create_kernel(data, mode='sparse',metric='cosine', num_neigh=6)
def test_cosine_neigh3(self, data, val): M = create_kernel(data, mode='sparse', metric='cosine', num_neigh=3) assert np.allclose(M.todense(), val)
def fl_dense_py_kernel_other_array(): K_dense = helper.create_kernel(dataArray, mode="dense", metric='euclidean', method="other") obj = FacilityLocationFunction(n=num_samples, mode="dense", sijs=K_dense, separate_rep=False,pybind_mode="array") obj.maximize(budget=budget,optimizer=optimizer, stopIfZeroGain=False, stopIfNegativeGain=False, verbose=False, show_progress=False)
def test_dense_euclidean(self, data): ED = euclidean_distances(data) gamma = 1 / np.shape(data)[1] ES = np.exp(-ED * gamma) #sklearn ground truth _, M = create_kernel(data, 'dense', 'euclidean') assert np.allclose(M, ES)
def py_sparse_kernel(dataArray, num_neighbors): K_sparse = create_kernel(dataArray, mode='sparse', metric='euclidean', num_neigh=num_neighbors) return K_sparse
def fl_sparse_py_kernel(): K_sparse = helper.create_kernel(dataArray, mode='sparse', metric='euclidean', num_neigh=num_neighbors) obj = FacilityLocationFunction(n=num_samples, mode="sparse", sijs=K_sparse, num_neighbors=num_neighbors) obj.maximize(budget=budget,optimizer=optimizer, stopIfZeroGain=False, stopIfNegativeGain=False, verbose=False)