Exemplo n.º 1
0
def svm_chi2(training, labels, test, real):
    chi2 = chi2_kernel(training, gamma = 0.02)
    chi2_test = chi2_kernel(test, training, gamma = 0.02)
    model = SVC(C = 1, kernel = 'precomputed',max_iter = -1)
    model.fit(chi2, labels) 
    accuracy = model.score(chi2_test, real)
    print(accuracy)
Exemplo n.º 2
0
def test_chi_square_kernel():
    rng = np.random.RandomState(0)
    X = rng.random_sample((5, 4))
    Y = rng.random_sample((10, 4))
    K_add = additive_chi2_kernel(X, Y)
    gamma = 0.1
    K = chi2_kernel(X, Y, gamma=gamma)
    assert_equal(K.dtype, np.float)
    for i, x in enumerate(X):
        for j, y in enumerate(Y):
            chi2 = -np.sum((x - y) ** 2 / (x + y))
            chi2_exp = np.exp(gamma * chi2)
            assert_almost_equal(K_add[i, j], chi2)
            assert_almost_equal(K[i, j], chi2_exp)

    # check diagonal is ones for data with itself
    K = chi2_kernel(Y)
    assert_array_equal(np.diag(K), 1)
    # check off-diagonal is < 1 but > 0:
    assert_true(np.all(K > 0))
    assert_true(np.all(K - np.diag(np.diag(K)) < 1))
    # check that float32 is preserved
    X = rng.random_sample((5, 4)).astype(np.float32)
    Y = rng.random_sample((10, 4)).astype(np.float32)

    K = chi2_kernel(X, Y)
    assert_equal(K.dtype, np.float32)
    # check integer type gets converted,
    # check that zeros are handled
    X = rng.random_sample((10, 4)).astype(np.int32)

    K = chi2_kernel(X, X)
    assert_true(np.isfinite(K).all())
    assert_equal(K.dtype, np.float)
    # check that kernel of similar things is greater than dissimilar ones
    X = [[.3, .7], [1., 0]]

    Y = [[0, 1], [.9, .1]]
    K = chi2_kernel(X, Y)
    assert_greater(K[0, 0], K[0, 1])
    assert_greater(K[1, 1], K[1, 0])

    # test negative input

    # sparse matrices
    assert_raises(ValueError, chi2_kernel, [[0, -1]])
    assert_raises(ValueError, chi2_kernel, [[0, -1]], [[-1, -1]])


    assert_raises(ValueError, chi2_kernel, [[0, 1]], [[-1, -1]])
    # different n_features in X and Y
    assert_raises(ValueError, chi2_kernel, [[0, 1]], [[.2, .2, .6]])
    assert_raises(ValueError, chi2_kernel, csr_matrix(X), csr_matrix(Y))
    assert_raises(ValueError, additive_chi2_kernel,
                  csr_matrix(X), csr_matrix(Y))
Exemplo n.º 3
0
def test_chi_square_kernel():
    rng = np.random.RandomState(0)
    X = rng.random_sample((5, 4))
    Y = rng.random_sample((10, 4))
    K_add = additive_chi2_kernel(X, Y)
    gamma = 0.1
    K = chi2_kernel(X, Y, gamma=gamma)
    assert_equal(K.dtype, np.float)
    for i, x in enumerate(X):
        for j, y in enumerate(Y):
            chi2 = -np.sum((x - y) ** 2 / (x + y))
            chi2_exp = np.exp(gamma * chi2)
            assert_almost_equal(K_add[i, j], chi2)
            assert_almost_equal(K[i, j], chi2_exp)

    # check diagonal is ones for data with itself
    K = chi2_kernel(Y)
    assert_array_equal(np.diag(K), 1)
    # check off-diagonal is < 1 but > 0:
    assert np.all(K > 0)
    assert np.all(K - np.diag(np.diag(K)) < 1)
    # check that float32 is preserved
    X = rng.random_sample((5, 4)).astype(np.float32)
    Y = rng.random_sample((10, 4)).astype(np.float32)
    K = chi2_kernel(X, Y)
    assert_equal(K.dtype, np.float32)

    # check integer type gets converted,
    # check that zeros are handled
    X = rng.random_sample((10, 4)).astype(np.int32)
    K = chi2_kernel(X, X)
    assert np.isfinite(K).all()
    assert_equal(K.dtype, np.float)

    # check that kernel of similar things is greater than dissimilar ones
    X = [[.3, .7], [1., 0]]
    Y = [[0, 1], [.9, .1]]
    K = chi2_kernel(X, Y)
    assert_greater(K[0, 0], K[0, 1])
    assert_greater(K[1, 1], K[1, 0])

    # test negative input
    assert_raises(ValueError, chi2_kernel, [[0, -1]])
    assert_raises(ValueError, chi2_kernel, [[0, -1]], [[-1, -1]])
    assert_raises(ValueError, chi2_kernel, [[0, 1]], [[-1, -1]])

    # different n_features in X and Y
    assert_raises(ValueError, chi2_kernel, [[0, 1]], [[.2, .2, .6]])

    # sparse matrices
    assert_raises(ValueError, chi2_kernel, csr_matrix(X), csr_matrix(Y))
    assert_raises(ValueError, additive_chi2_kernel,
                  csr_matrix(X), csr_matrix(Y))
Exemplo n.º 4
0
def svm_with_cv(data, labels):
    """ SVM with chi2 kernel and 5 fold cross validation """

    best_params, best_cv_score = grid_cv(data, labels)
    if ARGS.verbose:
        print('CV:', best_cv_score, best_params['clf__C'])

    svm_clf = SVC(C=best_params['clf__C'], kernel='precomputed')

    gram_matrix = chi2_kernel(data)

    svm_clf = svm_clf.fit(gram_matrix, labels)

    # Train a logistic regression to convert the output of
    # SVM into probabilities
    out = svm_clf.decision_function(gram_matrix)
    out = out.reshape(-1, 1)

    # print('out:', out.shape, 'labels:', labels.shape)

    lr_clf = LogisticRegression()
    lr_clf.fit(out, labels)

    if ARGS.verbose:
        lr_pred = lr_clf.predict(out)
        print("LR:", np.mean(labels == lr_pred))

    return svm_clf, lr_clf
Exemplo n.º 5
0
    def _get_kernel_matrix(self, X1, X2):
        # K is len(X1)-by-len(X2) matrix
        if self._kernel == 'rbf':
            K = pairwise.rbf_kernel(X1, X2, gamma=self._gamma)
        elif self._kernel == 'poly':
            K = pairwise.polynomial_kernel(X1,
                                           X2,
                                           degree=self._degree,
                                           gamma=self._gamma,
                                           coef0=self._coef0)
        elif self._kernel == 'linear':
            K = pairwise.linear_kernel(X1, X2)
        elif self._kernel == 'laplacian':
            K = pairwise.laplacian_kernel(X1, X2, gamma=self._gamma)
        elif self._kernel == 'chi2':
            K = pairwise.chi2_kernel(X1, X2, gamma=self._gamma)
        elif self._kernel == 'additive_chi2':
            K = pairwise.additive_chi2_kernel(X1, X2)
        elif self._kernel == 'sigmoid':
            K = pairwise.sigmoid_kernel(X1,
                                        X2,
                                        gamma=self._gamma,
                                        coef0=self._coef0)
        else:
            print('[Error] Unknown kernel')
            K = None

        return K
Exemplo n.º 6
0
 def process_similarity(self, similarity):
     if similarity == "cosine":
         x, y = np.triu_indices(self._similarity_matrix.shape[0], k=1)
         self._similarity_matrix[x, y] = cosine_similarity(self._data.sp_i_train_ratings)[x, y]
     elif similarity == "dot":
         self._similarity_matrix = (self._data.sp_i_train_ratings @ self._data.sp_i_train_ratings.T).toarray()
     elif similarity == "euclidean":
         x, y = np.triu_indices(self._similarity_matrix.shape[0], k=1)
         self._similarity_matrix[x, y] = (1 / (1 + euclidean_distances(self._data.sp_i_train_ratings)))[x, y]
     elif similarity == "manhattan":
         x, y = np.triu_indices(self._similarity_matrix.shape[0], k=1)
         self._similarity_matrix[x, y] = (1 / (1 + manhattan_distances(self._data.sp_i_train_ratings)))[x, y]
     elif similarity == "haversine":
         x, y = np.triu_indices(self._similarity_matrix.shape[0], k=1)
         self._similarity_matrix[x, y] = (1 / (1 + haversine_distances(self._data.sp_i_train_ratings)))[x, y]
     elif similarity == "chi2":
         x, y = np.triu_indices(self._similarity_matrix.shape[0], k=1)
         self._similarity_matrix[x, y] = (1 / (1 + chi2_kernel(self._data.sp_i_train_ratings)))[x, y]
     elif similarity in ['cityblock', 'l1', 'l2']:
         x, y = np.triu_indices(self._similarity_matrix.shape[0], k=1)
         self._similarity_matrix[x, y] = (1 / (1 + pairwise_distances(self._data.sp_i_train_ratings, metric=similarity)))[x, y]
     elif similarity in ['braycurtis', 'canberra', 'chebyshev', 'correlation', 'dice', 'hamming', 'jaccard', 'kulsinski', 'mahalanobis', 'minkowski', 'rogerstanimoto', 'russellrao', 'seuclidean', 'sokalmichener', 'sokalsneath', 'sqeuclidean', 'yule']:
         x, y = np.triu_indices(self._similarity_matrix.shape[0], k=1)
         self._similarity_matrix[x, y] = (1 / (1 + pairwise_distances(self._data.sp_i_train_ratings.toarray(), metric=similarity)))[x, y]
     else:
         raise Exception("Not implemented similarity")
Exemplo n.º 7
0
    def _apply_kernel(self, x, y):
        """Apply the selected kernel function to the data."""
        if self.kernel == 'linear':
            phi = linear_kernel(x, y)
        elif self.kernel == 'rbf':
            phi = rbf_kernel(x, y, self.coef1)
        elif self.kernel == 'poly':
            phi = polynomial_kernel(x, y, self.degree, self.coef1, self.coef0)
        elif self.kernel == 'sigmoid':
            coef0 = self.coef0 if self.coef0 is not None else 1
            phi = sigmoid_kernel(x, y, self.gamma, coef0)
        elif self.kernel == 'chi2':
            gamma = self.gamma if self.gamma is not None else 1
            phi = chi2_kernel(x, y, self.gamma)
        elif self.kernel == 'laplacian':
            phi = laplacian_kernel(x, y, self.gamma)
        elif callable(self.kernel):
            phi = self.kernel(x, y)
            if len(phi.shape) != 2:
                raise ValueError(
                    "Custom kernel function did not return 2D matrix")
            if phi.shape[0] != x.shape[0]:
                raise ValueError(
                    "Custom kernel function did not return matrix with rows"
                    " equal to number of data points."
                    "")
        else:
            raise ValueError("Kernel selection is invalid.")

        if self.bias_used:
            phi = np.append(phi, np.ones((phi.shape[0], 1)), axis=1)

        return phi
def myKernel(x, y):
	gamma_hist = 1

	now = time()
	hist_kernel = 0
	hist_kernel = chi2_kernel(x[:, :25], y[:, :25], gamma_hist)
	hist_kernel += chi2_kernel(x[:, 25:33], y[:, 25:33], gamma_hist)
	hist_kernel += chi2_kernel(x[:, 33:41], y[:, 33:41], gamma_hist)
	print(time()-now,flush=True)

	now = time()
	rbf_kern = rbf_kernel(x[:,41:],
							y[:,41:],1/40)
	print(time()-now,flush=True)

	return hist_kernel + rbf_kern
def cross_validate(X, y):
    
    svc = svm.SVC(kernel='linear', C = 0.0625)
    
    lin_svc = svm.LinearSVC(C = 4.0, dual = False)
    
    rbf_svc = svm.SVC(kernel='rbf', gamma = 0.0009765625, C = 32.0)
    
    poly_svc = svm.SVC(kernel='poly', degree = 2 , C = 2048.0)
    
    hist_svc = svm.SVC(kernel = 'precomputed')
    chi2_svc = svm.SVC(kernel = 'precomputed')
    
    # random_forest = RandomForestClassifier(n_estimators = 200, max_features = 50, min_samples_split = 20, random_state = 100)
    # 5-fold cross validation
    for model in [svc, lin_svc, rbf_svc, poly_svc]:
        print model
        scores = cross_val_score(model, X, y, cv=10)
        print scores
        print("Accuracy: %0.2f (+/- %0.2f)" % (scores.mean(), scores.std() * 2))
    
    print hist_svc
    K = hist_intersection(X, X)
    scores = cross_val_score(model, K, y, cv=10)
    print scores
    print("Accuracy: %0.2f (+/- %0.2f)" % (scores.mean(), scores.std() * 2))
    
    print chi2_svc
    K = chi2_kernel(X, gamma = 0.3)
    scores = cross_val_score(model, K, y, cv=10)
    print scores
    print("Accuracy: %0.2f (+/- %0.2f)" % (scores.mean(), scores.std() * 2))
Exemplo n.º 10
0
Arquivo: main.py Projeto: asmolik/snr
def evaluate_chi2_kernel(data, gamma, C):
    train_data, train_labels, test_data, test_labels = data
    log.message('Training chi^2 kernel ...')

    start = time.time()
    svc = svm.SVC(kernel='precomputed', C=C)
    train_kernel = chi2_kernel(train_data, gamma=gamma)
    svc.fit(train_kernel, train_labels)
    log.time(time.time() - start)

    log.message('Evaluating ...')
    test_kernel = chi2_kernel(test_data, train_data, gamma=gamma)
    predicted_labels = svc.predict(test_kernel)
    log.message(classification_report(test_labels, predicted_labels))
    total_accuracy = np.count_nonzero(np.array(test_labels) == np.array(predicted_labels)) / len(test_labels)
    log.accuracy(total_accuracy)
Exemplo n.º 11
0
def cross_validate(X, y):
    
    svc = svm.SVC(kernel='linear', C = 0.0625)
    
    lin_svc = svm.LinearSVC(C = 4.0, dual = False)
    
    rbf_svc = svm.SVC(kernel='rbf', gamma = 0.0009765625, C = 32.0)
    
    poly_svc = svm.SVC(kernel='poly', degree = 2 , C = 2048.0)
    
    hist_svc = svm.SVC(kernel = 'precomputed')
    chi2_svc = svm.SVC(kernel = 'precomputed')
    
    # random_forest = RandomForestClassifier(n_estimators = 200, max_features = 50, min_samples_split = 20, random_state = 100)
    # 5-fold cross validation
    for model in [svc, lin_svc, rbf_svc, poly_svc]:
        print model
        scores = cross_val_score(model, X, y, cv=10)
        print scores
        print("Accuracy: %0.2f (+/- %0.2f)" % (scores.mean(), scores.std() * 2))
    
    print hist_svc
    K = hist_intersection(X, X)
    scores = cross_val_score(model, K, y, cv=10)
    print scores
    print("Accuracy: %0.2f (+/- %0.2f)" % (scores.mean(), scores.std() * 2))
    
    print chi2_svc
    K = chi2_kernel(X, gamma = 0.3)
    scores = cross_val_score(model, K, y, cv=10)
    print scores
    print("Accuracy: %0.2f (+/- %0.2f)" % (scores.mean(), scores.std() * 2))
Exemplo n.º 12
0
 def createMatrixLocLoc(self):
     matrix = []
     t = Task5()
     for i in range(1, len(t.locations) + 1):
         listofmax = []
         for x in t.Models:
             listofmaximages = []
             for j in range(1, len(t.locations) + 1):
                 list1 = t.searchFirstFile(i, x)
                 list2 = t.searchFirstFile(j, x)
                 if (len(list1) == 0 or len(list2) == 0):
                     listofmaximages.append(0)
                     continue
                 df = np.array(list1)
                 df2 = np.array(list2)
                 if x in ["CM3x3", "GLRLM", "CN3x3", "CN", "CM"]:
                     dist = euclidean_distances(df[:, 1:], df2[:, 1:])
                 elif x in ["LBP", "GLRLM3x3", "LBP3x3"]:
                     dist = chi2_kernel(df[:, 1:], df2[:, 1:])
                 elif x in ["HOG", "CSD"]:
                     dist = cosine_distances(df[:, 1:], df2[:, 1:])
                 min = dist.min(axis=1)
                 s = np.sum(np.array(min))
                 listofmaximages.append(s)
             listofmax.append(listofmaximages)
         listofm = np.array(listofmax)
         indexes = np.argsort(listofm, axis=1)
         locations = np.argsort(indexes, axis=1)
         sumlocation = np.sum(locations, axis=0)
         matrix.append(sumlocation)
         print(i)
     return matrix
Exemplo n.º 13
0
def build_train_kernels(categories, datamanager):
    kernels = []
    gammas = []
    for c in categories:
        X = datamanager.build_sample_matrix("train", c)
        gamma = approximate_gamma(X)
        gammas.append(gamma)
        kernels.append(chi2_kernel(X, X, gamma=1.0 / gamma))
    return kernels, gammas
Exemplo n.º 14
0
def histogram_similarity(hist1, hist2):
    """Calculate the similarity between two molecule histograms.

    Args:
    hist1: molecule histogram

    hist2: molecule histogram
    """
    return pairwise.chi2_kernel(hist1, hist2)
Exemplo n.º 15
0
def build_train_kernels(categories, datamanager):
    kernels = []
    gammas = []
    for c in categories:
        X = datamanager.build_sample_matrix("train", c)
        gamma = approximate_gamma(X)
        gammas.append(gamma)
        kernels.append(chi2_kernel(X, X, gamma=1.0/gamma))
    return kernels, gammas
Exemplo n.º 16
0
def svms(trainData, testData, trainOutcomes):
    linear = SVC(kernel='linear', class_weight='balanced', probability=True)
    linear.fit(trainData, trainOutcomes)
    svm_linear_posterior = linear.predict_proba(testData)

    rbf = SVC(class_weight='balanced', probability=True)
    rbf.fit(trainData, trainOutcomes)
    svm_rbf_posterior = rbf.predict_proba(testData)

    trainDistances = chi2_kernel(trainData, trainData)
    testDistances = chi2_kernel(testData, trainData)

    svc = SVC(kernel='precomputed', class_weight='balanced', probability=True)
    svc.fit(trainDistances, trainOutcomes)

    chi2svm_posterior = svc.predict_proba(testDistances)

    return svm_linear_posterior, svm_rbf_posterior, chi2svm_posterior
Exemplo n.º 17
0
    def similarityEuclidean(self, LocationId, k):
        t = Task5()
        listofmax = []
        distSum = {}
        listofmaximages = []

        for x in t.Models:
            for j in range(1, 31):
                if j != LocationId:
                    list1 = t.searchFirstFile(LocationId, x)
                    list2 = t.searchFirstFile(j, x)
                    df = np.array(list1)
                    df2 = np.array(list2)
                    if x in ["CM3x3", "GLRLM", "CN3x3", "CN", "CM"]:
                        dist = euclidean_distances(df[:, 1:], df2[:, 1:])
                    elif x in ["LBP", "GLRLM3x3", "LBP3x3"]:
                        dist = chi2_kernel(df[:, 1:], df2[:, 1:])
                    elif x in ["HOG", "CSD"]:
                        dist = cosine_distances(df[:, 1:], df2[:, 1:])
                    max = dist.argmin(axis=1)
                    s = 0
                    for i in range(len(df)):
                        s += dist[i, max[i]]
                    distSum = {}
                    distSum["value"] = s
                    distSum["Model"] = x
                    distSum["Location"] = j
                    listofmaximages.append(distSum)
            listofmax.append(sorted(listofmaximages, key=lambda k: k["value"]))
            listofmaximages = []

        map = {}
        for i in range(31):
            if i != LocationId:
                rank = 0
                for list in listofmax:
                    count = 1
                    for x in list:
                        if i == x["Location"]:
                            rank += count
                            break
                        else:
                            count += 1
                map[i + 1] = rank
        sorted_dict = sorted(map.items(), key=operator.itemgetter(1))
        for i in range(k):
            print("Mached Locations")
            print(t.locations[sorted_dict[i][0]])
            for list in listofmax:
                for x in list:
                    if x["Location"] == sorted_dict[i][0]:
                        print("Model: ", end="")
                        print(x["Model"])
                        print("Value", end="")
                        print(x["value"])
def calc_gaussian_sim(data_matrix, method):
    if method == "rbf":
        return rbf_kernel(data_matrix)
    elif method == "chi2":
        return chi2_kernel(data_matrix)
    elif method == "laplacian":
        return laplacian_kernel(data_matrix)
    elif method == "sigmoid":
        return sigmoid_kernel(data_matrix)
    else:
        raise ValueError("Wron method parameter ind calc_gaussian_sim()")
 def transform(self, X, Y):
     if self.type == 'rbf':
         return rbf_kernel(X, Y, self.gamma)[0]
     elif self.type == 'Chi2':
         return chi2_kernel(X, Y, self.gamma)[0]
     elif self.type == 'AChi2':
         return -additive_chi2_kernel(X, Y)[0]
     elif self.type == 'laplacian':
         return laplacian_kernel(X, Y, self.gamma)[0]
     elif self.type == 'sigmoid':
         return sigmoid_kernel(X, Y, self.gamma, self.coef0)[0]
Exemplo n.º 20
0
    def __init__(self, kernel_name='rbf', type='classification'):

        self.kernel_name = kernel_name
        self.type = type
        self.kernel_dict = {
            "rbf": lambda x, y=None: rbf_kernel(x, y),
            "linear": lambda x, y=None: linear_kernel(x, y),
            "add_chi2": lambda x, y=None: additive_chi2_kernel(x, y),
            "chi2": lambda x, y=None: chi2_kernel(x, y),
            "poly": lambda x, y=None: polynomial_kernel(x, y),
            "laplace": lambda x, y=None: laplacian_kernel(x, y)
        }
Exemplo n.º 21
0
def calculateMultipleKernel(x, y):
    theta = random.sample(range(1,47),46) # given a random theta for now

    # Convert our 2d arrays to numpy arrays
    x = np.array(x)
    y = np.array(y)
    
    # Reshape the array-like input vectors since we only have one sample
    x = x.reshape(1,-1)
    y = y.reshape(1,-1)
    
    # Variables to aggregate the kernel result
    kernelResult = 0;
    index = 0; 
    
    for i in range(0,3):
        kernelResult += theta[index] * additive_chi2_kernel(x,y)
        index += 1
        
    for i in range(0,3):
        kernelResult += theta[index] * chi2_kernel(x,y,theta[index+1])
        index += 2
    
    for i in range(0,3):
        kernelResult += theta[index] * cosine_similarity(x,y)
        index += 1
    
    for i in range(0,3):
        kernelResult += theta[index] * linear_kernel(x,y)
        index += 1
    
    for i in range(0,3):
        kernelResult += theta[index] * polynomial_kernel(
            x,y,theta[index+1],theta[index+2], theta[index+3])
        index += 4
        
    for i in range(0,3):
        kernelResult += theta[index] * rbf_kernel(x,y,theta[index+1])
        index += 2
        
    for i in range(0,3):
        kernelResult += theta[index] * laplacian_kernel(x,y,theta[index+1])
        index += 2
    
    for i in range(0,3):
        kernelResult += theta[index] * sigmoid_kernel(x,y,theta[index+1])
        index += 2
        
    return kernelResult
Exemplo n.º 22
0
    def coding_unified(self, codebook_, feats_):

        feats = feats_.copy()
        codebook = codebook_.copy()
        # feats = np.concatenate(feats)

        if self.debug:
            print('\t- coding features ...')
            sys.stdout.flush()

        if 'hard' in self.coding_poling:

            coded_feats = np.zeros((feats.shape[:2] + (self.codebook_size, )),
                                   dtype=np.int)

            feats = feats.reshape(feats.shape[0], feats.shape[1], -1)
            idxs_cuboid = np.arange(feats.shape[1])

            codebook -= codebook.min(axis=1).reshape(-1, 1)

            for sample in range(feats.shape[0]):
                feats[sample] -= feats[sample].min(axis=1).reshape(-1, 1)
                idxs = np.argmin(pairwise_distances(feats[sample],
                                                    codebook,
                                                    metric="cosine"),
                                 axis=1)
                coded_feats[sample, idxs_cuboid, idxs] = 1

        elif 'soft' in self.coding_poling:

            coded_feats = np.zeros((feats.shape[:2] + (self.codebook_size, )),
                                   dtype=np.float)
            beta = 1.0 / (2.0 * self.variance)
            codebook -= codebook.min(axis=1).reshape(-1, 1)

            for sample in range(feats.shape[0]):
                feats[sample] -= feats[sample].min(axis=1).reshape(-1, 1)
                coded_feats[sample] = chi2_kernel(feats[sample],
                                                  codebook,
                                                  gamma=beta)

                cfnorm = coded_feats[sample].sum(axis=1).reshape(-1, 1)
                cfnorm[cfnorm == 0] = 1.
                coded_feats[sample] /= cfnorm

        else:
            raise ValueError('Coding method not implemented')

        return coded_feats
Exemplo n.º 23
0
def calculateMultipleKernel(x, y):
    theta = random.sample(range(1, 47), 46)  # given a random theta for now

    # Convert our 2d arrays to numpy arrays
    x = np.array(x)
    y = np.array(y)

    # Reshape the array-like input vectors since we only have one sample
    x = x.reshape(1, -1)
    y = y.reshape(1, -1)

    # Variables to aggregate the kernel result
    kernelResult = 0
    index = 0

    for i in range(0, 3):
        kernelResult += theta[index] * additive_chi2_kernel(x, y)
        index += 1

    for i in range(0, 3):
        kernelResult += theta[index] * chi2_kernel(x, y, theta[index + 1])
        index += 2

    for i in range(0, 3):
        kernelResult += theta[index] * cosine_similarity(x, y)
        index += 1

    for i in range(0, 3):
        kernelResult += theta[index] * linear_kernel(x, y)
        index += 1

    for i in range(0, 3):
        kernelResult += theta[index] * polynomial_kernel(
            x, y, theta[index + 1], theta[index + 2], theta[index + 3])
        index += 4

    for i in range(0, 3):
        kernelResult += theta[index] * rbf_kernel(x, y, theta[index + 1])
        index += 2

    for i in range(0, 3):
        kernelResult += theta[index] * laplacian_kernel(x, y, theta[index + 1])
        index += 2

    for i in range(0, 3):
        kernelResult += theta[index] * sigmoid_kernel(x, y, theta[index + 1])
        index += 2

    return kernelResult
Exemplo n.º 24
0
def chi2Vector(imgG, imgQ, windowSize):
    hImgG = ce.horizontalS2(imgG, 0, imgG.shape[1], 0, imgG.shape[0], 4)
    vImgG = ce.verticalS2(imgG, 0, imgG.shape[1], 0, imgG.shape[0], 4)
    hImgQ = ce.horizontalS2(imgQ, 0, imgQ.shape[1], 0, imgQ.shape[0], 4)
    vImgQ = ce.verticalS2(imgQ, 0, imgQ.shape[1], 0, imgQ.shape[0], 4)

    histG = HoGLBP([hImgG, vImgG], imgG, windowSize)
    histQ = HoGLBP([hImgQ, vImgQ], imgQ, windowSize)

    fFHoG = histG["HoG"][:1200]
    fFLBP = histG["HoG"][1200:]
    sFHoG = histQ["HoG"][:1200]
    sFLBP = histQ["HoG"][1200:]

    HoG_chi2 = [
        chi2_kernel([fFHoG[i * 8:(i + 1) * 8]],
                    [sFHoG[i * 8:(i + 1) * 8]]).ravel()[0]
        for i in range(0, 150)
    ]
    LBP_chi2 = [
        chi2_kernel([fFLBP[i * 10:(i + 1) * 10]],
                    [sFLBP[i * 10:(i + 1) * 10]]).ravel()[0]
        for i in range(0, 30)
    ]
    t1H = np.concatenate(histG["imageSurroundingPointsH"], axis=1)
    t2H = np.concatenate(histQ["imageSurroundingPointsH"], axis=1)
    tH = np.concatenate((t1H, t2H), axis=0)
    t1V = np.concatenate(histG["imageSurroundingPointsV"], axis=1)
    t2V = np.concatenate(histQ["imageSurroundingPointsV"], axis=1)
    tV = np.concatenate((t1V, t2V), axis=0)
    tT = np.concatenate((tH, tV), axis=0)

    return {
        "chi2_vector": np.concatenate((HoG_chi2, LBP_chi2), axis=0).tolist(),
        "combined_image": tT
    }
Exemplo n.º 25
0
def chi_kernel(x1,x2,sigma=0.5):

    #######################################################################
    # TODO                                                                #
    # Compute Gaussian kernel                                             #
    # 1 line of code expected                                             #
    #######################################################################
    
    k = chi2_kernel(x1, x2, gamma=sigma)

    #######################################################################
    #  end of code                                                        #
    #######################################################################

    return k
Exemplo n.º 26
0
def tracks_2_kernel(tracks_X, fields, tracks_Y=None, gamma=1.0, weights=None):
    """
    Compute kernel from trajectories.

    Parameters
    ----------
    tracks_X : structured array
    tracks_Y : strucutred array or None
    gamma : float
    fields : sequence of string
        Fields from trajectories to use for building kernel.
    weights : sequence of float or None
        Weights for fields.

    Returns
    -------

    """
    if not weights:
        weights = np.ones(shape=(len(fields), ))
        weights = weights / np.sum(weights)
    if tracks_Y is None:
        tracks_Y = tracks_X

    K = np.zeros(shape=(len(tracks_X), len(tracks_Y)))
    print(humansize(K.nbytes))
    # print('Kernel {} - {}'.format(K.shape, humansize(K.nbytes)))

    for name, weight in zip(fields, weights):
        print('Kernel {} - {}'.format(K.shape, name))
        X = tracks_X[name]
        Y = tracks_Y[name]

        if name == 'trajectory':
            X = X.reshape((len(X), -1))
            Y = Y.reshape((len(Y), -1))
            old_min, old_max = -1, 1
            new_min, new_max = 0, 1
            X = ((X - old_min) /
                 (old_max - old_min)) * (new_max - new_min) + new_min
            Y = ((Y - old_min) /
                 (old_max - old_min)) * (new_max - new_min) + new_min

        chi2 = pairwise.chi2_kernel(X, Y, gamma=gamma)
        # mu = 1.0 / kernel.mean()
        # K_train += weight * np.exp(-mu * kernel)
        K += weight * chi2
    return K
def test_nystroem_default_parameters():
    rnd = np.random.RandomState(42)
    X = rnd.uniform(size=(10, 4))

    # rbf kernel should behave as gamma=None by default
    # aka gamma = 1 / n_features
    nystroem = Nystroem(n_components=10)
    X_transformed = nystroem.fit_transform(X)
    K = rbf_kernel(X, gamma=None)
    K2 = np.dot(X_transformed, X_transformed.T)
    assert_array_almost_equal(K, K2)

    # chi2 kernel should behave as gamma=1 by default
    nystroem = Nystroem(kernel='chi2', n_components=10)
    X_transformed = nystroem.fit_transform(X)
    K = chi2_kernel(X, gamma=1)
    K2 = np.dot(X_transformed, X_transformed.T)
    assert_array_almost_equal(K, K2)
Exemplo n.º 28
0
def test_nystroem_default_parameters():
    rnd = np.random.RandomState(42)
    X = rnd.uniform(size=(10, 4))

    # rbf kernel should behave as gamma=None by default
    # aka gamma = 1 / n_features
    nystroem = Nystroem(n_components=10)
    X_transformed = nystroem.fit_transform(X)
    K = rbf_kernel(X, gamma=None)
    K2 = np.dot(X_transformed, X_transformed.T)
    assert_array_almost_equal(K, K2)

    # chi2 kernel should behave as gamma=1 by default
    nystroem = Nystroem(kernel='chi2', n_components=10)
    X_transformed = nystroem.fit_transform(X)
    K = chi2_kernel(X, gamma=1)
    K2 = np.dot(X_transformed, X_transformed.T)
    assert_array_almost_equal(K, K2)
Exemplo n.º 29
0
def test_1():
    emb1 = load_embeddings("embeddings_elias.pkl")
    emb2 = load_embeddings("embeddings_matthias.pkl")
    emb3 = load_embeddings("embeddings_laia.pkl")
    emb_lfw = load_embeddings("embeddings_lfw.pkl")

    # prepare ds
    np.random.shuffle(emb2)
    if len(emb2) % 2 != 0:
        emb2 = emb2[:-1]

    split_set = np.array_split(emb2, 2)

    X_train = split_set[0]
    X_test = split_set[1]
    X_outliers = emb_lfw

    K = chi2_kernel(X_train, gamma=.5)
    print K
Exemplo n.º 30
0
 def _kernel(self, X, Y=None):
     kernel = None
     if self.kernel == 'chi2':
         kernel = chi2_kernel(X, Y, gamma=self.gamma)
     elif self.kernel == 'laplacian':
         kernel = laplacian_kernel(X, Y, gamma=self.gamma)
     elif self.kernel == 'linear':
         kernel = linear_kernel(X, Y)
     elif self.kernel == 'polynomial':
         kernel = polynomial_kernel(X,
                                    Y,
                                    degree=self.degree,
                                    gamma=self.gamma,
                                    coef0=self.coef0)
     elif self.kernel == 'rbf':
         kernel = rbf_kernel(X, Y, gamma=self.gamma)
     elif self.kernel == 'sigmoid':
         kernel = sigmoid_kernel(X, Y, gamma=self.gamma, coef0=self.coef0)
     return kernel
Exemplo n.º 31
0
def grid_cv(X, y, k=5):
    """ Grid search over param space with k-fold cross validation """

    K = chi2_kernel(X)

    pipeline = Pipeline([
        ('clf', SVC(kernel='precomputed')),
    ])

    params = {
        'clf__C': (1e-2, 1e-1, 1, 1e+1, 1e+2),
    }

    grid_search = GridSearchCV(pipeline, params, n_jobs=1, verbose=0, cv=k)

    grid_search.fit(K, y)

    best_params = grid_search.best_estimator_.get_params()

    return best_params, grid_search.best_score_
Exemplo n.º 32
0
    def coding_unified(self, codebook_, feats_):

        feats = feats_.copy()
        codebook = codebook_.copy()

        if self.debug:
            print '\t- coding features ...'
            sys.stdout.flush()

        if 'hard' in self.coding_poling:

            coded_feats = np.zeros((feats.shape[:2] + (self.codebook_size,)), dtype=np.int)

            feats = feats.reshape(feats.shape[0], feats.shape[1], -1)
            idxs_cuboid = np.arange(feats.shape[1])

            codebook -= codebook.min(axis=1).reshape(-1, 1)

            for sample in range(feats.shape[0]):
                feats[sample] -= feats[sample].min(axis=1).reshape(-1, 1)
                idxs = np.argmin(pairwise_distances(feats[sample], codebook, metric="cosine"), axis=1)
                coded_feats[sample, idxs_cuboid, idxs] = 1

        elif 'soft' in self.coding_poling:

            coded_feats = np.zeros((feats.shape[:2] + (self.codebook_size,)), dtype=np.float)
            beta = 1.0 / (2.0 * self.variance)
            codebook -= codebook.min(axis=1).reshape(-1, 1)

            for sample in range(feats.shape[0]):
                feats[sample] -= feats[sample].min(axis=1).reshape(-1, 1)
                coded_feats[sample] = chi2_kernel(feats[sample], codebook, gamma=beta)

                cfnorm = coded_feats[sample].sum(axis=1).reshape(-1, 1)
                cfnorm[cfnorm == 0] = 1.
                coded_feats[sample] /= cfnorm

        else:
            raise ValueError('Coding method not implemented')

        return coded_feats
Exemplo n.º 33
0
def predict_features(train_data, test_data, test_labels, clfs):
    """ Predict class probabilites for a given set of test features.
    SVM followed by logistic regression """

    svm_clf, lr_clf = clfs

    gram_matrix = chi2_kernel(test_data, train_data)

    # y_out has perpendicular distances between decision boundary
    # and each point
    y_out = svm_clf.decision_function(gram_matrix)
    y_out = y_out.reshape(-1, 1)

    # convert the above distances into probabilities
    y_prob = lr_clf.predict_proba(y_out)

    if ARGS.verbose:
        y_pred = lr_clf.predict(y_out)
        print("LR test acc:", np.mean(y_pred == test_labels))

    return y_prob
Exemplo n.º 34
0
def get_kernel_matrix(X1, X2=None, kernel='rbf',gamma = 1, degree = 3, coef0=1):
    #Obtain N1xN2 kernel matrix from N1xM and N2xM data matrices
    if kernel == 'rbf':
        K = pairwise.rbf_kernel(X1,X2,gamma = gamma);
    elif kernel == 'poly':
        K = pairwise.polynomial_kernel(X1,X2,degree = degree, gamma = gamma,
                                       coef0 = coef0);
    elif kernel == 'linear':
        K = pairwise.linear_kernel(X1,X2);
    elif kernel == 'laplacian':
        K = pairwise.laplacian_kernel(X1,X2,gamma = gamma);
    elif kernel == 'chi2':
        K = pairwise.chi2_kernel(X1,X2,gamma = gamma);
    elif kernel == 'additive_chi2':
        K = pairwise.additive_chi2_kernel(X1,X2);
    elif kernel == 'sigmoid':
        K = pairwise.sigmoid_kernel(X1,X2,gamma = gamma,coef0 = coef0);
    else:
        print('[Error] Unknown kernel');
        K = None;
    return K;
Exemplo n.º 35
0
from sklearn.svm import SVC
from sklearn.metrics.pairwise import chi2_kernel
'''
The chi-squared kernel is a very popular choice for
training non-linear SVMs in computer vision applications.
'''

X = [[0, 1], [1, 0], [.2, .8], [.7, .3]]
y = [0, 1, 0, 1]
K = chi2_kernel(X, gamma=.5)
# Usage 1. passed to an sklearn.svm.SVC with kernel="precomputed":
svm = SVC(kernel='precomputed').fit(K, y)
svm.predict(K)

# Usage 2. directly used as the kernel argument:
svm = SVC(kernel=chi2_kernel).fit(X, y)
svm.predict(X)
Exemplo n.º 36
0
    def coding_class_based(self, codebook_pos_, codebook_neg_, feats_):

        feats = feats_.copy()
        codebook_pos = codebook_pos_.copy()
        codebook_neg = codebook_neg_.copy()

        if self.debug:
            print '\t- coding features ...'
            sys.stdout.flush()

        if 'hard' in self.coding_poling:

            print "\t- feats.shape", feats.shape

            coded_feats = np.zeros((feats.shape[:2] + (self.codebook_size + self.codebook_size,)), dtype=np.int)

            feats = feats.reshape(feats.shape[0], feats.shape[1], -1)
            idxs_cuboid = np.arange(feats.shape[1])

            codebook_pos -= codebook_pos.min(axis=1).reshape(-1, 1)
            codebook_neg -= codebook_neg.min(axis=1).reshape(-1, 1)

            for sample in range(feats.shape[0]):
                feats[sample] -= feats[sample].min(axis=1).reshape(-1, 1)

                dists_pos = pairwise_distances(feats[sample], codebook_pos, metric="cosine")
                dists_neg = pairwise_distances(feats[sample], codebook_neg, metric="cosine")

                dists = np.hstack((dists_neg, dists_pos))
                idxs = np.argmin(dists, axis=1)

                coded_feats[sample, idxs_cuboid, idxs] = 1

        elif 'soft' in self.coding_poling:

            print "\t- feats.shape", feats.shape
            coded_feats = np.zeros((feats.shape[:2] + (self.codebook_size + self.codebook_size,)), dtype=np.float)
            feats = feats.reshape(feats.shape[0], feats.shape[1], -1)

            beta = 1.0 / (2.0 * self.variance)

            codebook_pos -= codebook_pos.min(axis=1).reshape(-1, 1)
            codebook_neg -= codebook_neg.min(axis=1).reshape(-1, 1)

            for sample in range(feats.shape[0]):
                feats[sample] -= feats[sample].min(axis=1).reshape(-1, 1)

                dists_pos = chi2_kernel(feats[sample], codebook_pos, gamma=beta)
                dists_neg = chi2_kernel(feats[sample], codebook_neg, gamma=beta)

                cfnorm = dists_pos.sum(axis=1).reshape(-1, 1)
                cfnorm[cfnorm == 0] = 1.
                dists_pos /= cfnorm

                cfnorm = dists_neg.sum(axis=1).reshape(-1, 1)
                cfnorm[cfnorm == 0] = 1.
                dists_neg /= cfnorm

                coded_feats[sample] = np.hstack((dists_neg, dists_pos))

        else:
            raise ValueError('Coding method not implemented')

        return coded_feats
Exemplo n.º 37
0
for n in pd.DataFrame(ipos['Lead Mgr'].unique(),
columns=['Name']).sort_values('Name')['Name']:                            
service_args=['--ignore-ssl-errors=true'])
driver.implicitly_wait(20)
driver.get(url)          
driver.save_screenshot(r'flight_explorer.png')
   import gspread
from oauth2client.client import SignedJwtAssertionCredentials
json_key = json.load(open(r'/PATH_TO_KEY/KEY.json'))
scope = ['https://spreadsheets.google.com/feeds']
credentials = SignedJwtAssertionCredentials(json_key['client_email'],
json_key['private_key'].encode(), scope)
gc = gspread.authorize(credenti                                          
 
  from sklearn.metrics.pairwise import chi2_kernel
k_sim = chi2_kernel(X[0].reshape(1,-1), X)
kf = pd.DataFrame(k_sim).T
kf.columns = ['similarity']
kf.sort_values('similarity', ascending=False)
   import sys
import pandas as pd
import numpy as np
import requests
from selenium import webdriver
from selenium.webdriver.common.desired_capabilities import
DesiredCapabilities
from selenium.webdriver.common.by import By                    
   from flask import Flask, request, redirect
import twilio.twiml
import pandas as pd
import re
Exemplo n.º 38
0
from emd import emd
import numpy as np
from sklearn.metrics.pairwise import chi2_kernel
import code


l = np.genfromtxt('./names_and_counts.txt', dtype=str, delimiter=',')
l= l[:,1:]
l=l.astype(float)
row_sums = l.sum(axis=1)
new_matrix = l / row_sums[:, np.newaxis]
xx, yy = new_matrix.shape


simMatrix = chi2_kernel(new_matrix)
		
code.interact(local=locals())
		

np.savetxt("chi_sim.csv", simMatrix, delimiter=",")
Exemplo n.º 39
0
def compute_kernel(X):
    print('computing kernel...')

    K = chi2_kernel(X)

    print('precomputed kernel')
Exemplo n.º 40
0
def main(top_folder, image_folder):

    init_time = timeit.default_timer()
#//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////#
    # Download labels for pictures
    print('Getting Labels  --------------------------------------------------')
    mat = loadmat(top_folder + '/' + top_folder +'imagelabels.mat')
    labels = mat['labels'][0].tolist()
    print('Done')
    print('------------------------------------------------------------------\n')

#//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////#
    print('Getting Info on Train, Val, Test Data   --------------------------')
    mat = loadmat(top_folder + '/' + top_folder + 'datasplits.mat')
    trn = [mat['trn1'][0].tolist(), mat['trn2'][0].tolist(), mat['trn3'][0].tolist()]
    tst = [mat['tst1'][0].tolist(), mat['tst2'][0].tolist(), mat['tst3'][0].tolist()]
    val = [mat['val1'][0].tolist(), mat['val2'][0].tolist(), mat['val3'][0].tolist()]
    print('Done')
    print('------------------------------------------------------------------\n')


#//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////#
    print('Getting Images Names  --------------------------------------------')
    imagefiles = []
    # Training model on 32X32 images
    for imagefile in glob(top_folder + image_folder + '/image_*.jpg'):
        imagefiles.append(imagefile)
    imagefiles = sorted(imagefiles)
    print('Done')
    print('------------------------------------------------------------------\n')

    fout_test = open('image_labels.txt','w')
    for ind, element in enumerate(imagefiles):
        fout_test.write(str(labels[ind]) + ' ' + element + '\n')

#//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////#
    print('Getting Images H,S,V  --------------------------------------------')
    print('and Seperating Traning, Validation, and Test Data  ---------------')
    trn_hsv_images = []
    val_hsv_images = []
    tst_hsv_images = []

    trn_labels = []
    val_labels = []
    tst_labels = []

    for idx, imagefile in enumerate(imagefiles):
        img = Image.open(imagefile)
        img.load()

        # Get tuple of r,g,b values for each pixel
        rgb_image = list(img.getdata())

        # transform r,g,b to h,s,v
        hsv_image = hsv_image_calculate(rgb_image)
        '''
        hsv_image = []
        for pixel in rgb_image:
            r = pixel[0]
            g = pixel[1]
            b = pixel[2]
            h,s,v = colorsys.rgb_to_hsv(r,g,b)
            hsv_image.append((h,s,v))
        '''

        # sort between training, testing, and validation
        if idx+1 in trn[0]:
            trn_hsv_images.append(hsv_image)
            trn_labels.append(labels[idx])
        elif idx+1 in val[0]:
            val_hsv_images.append(hsv_image)
            val_labels.append(labels[idx])
        elif idx+1 in tst[0]:
            tst_hsv_images.append(hsv_image)
            tst_labels.append(labels[idx])
        else:
            print idx

    print('# of training images: ' + str(len(trn_hsv_images)))
    print('# of validation images: ' + str(len(val_hsv_images)))
    print('# of test images: ' + str(len(tst_hsv_images)))
    print('')
    print('# of traning labels:' + str(len(trn_labels)))
    print('# of validation labels:' + str(len(val_labels)))
    print('# of test labels:' + str(len(val_labels)))
    print('------------------------------------------------------------------\n')

#//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////#
    print('Getting H,S,V Features from Training Set  ------------------------')
    start_time = timeit.default_timer()
    hsv_features = []
    for image in trn_hsv_images:
        # getting features from 1/3 of the training images
        if random.randrange(0,4,1) == 1:
            hsv_features = hsv_features + image
    hsv_features = numpy.asarray(hsv_features)
    print 'Feature matrix: ' + str(hsv_features.shape)
    print('Elapsed Time: ' + str(timeit.default_timer() - start_time))
    print('------------------------------------------------------------------\n')




#//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////#
    print('Training and Validation  ----------------------------------------')
    training_time = timeit.default_timer()
    minError = -1
    bestWords = -1
    for numWords in range(200, 1001, 100):
        start_time2 = timeit.default_timer()

    ####################################################################################################################
        print(str(numWords) + ' Word Vocabulary  ---------------------------------------------')

        print('Clustering Features...')
        start_time3 = timeit.default_timer()
        kmodel = kmeans_cluster(numWords, hsv_features)
        print('Elapsed Time: ' + str(timeit.default_timer() - start_time3) + '\n')

    ####################################################################################################################
        print('Hot Encoding Training Data...')
        start_time3 = timeit.default_timer()
        trn_hsv_hot_vectors = []
        for image in trn_hsv_images:
            image_as_clusters = kmodel.predict(numpy.asarray(image)).tolist()
            hot_vector = [0]*numWords
            for  element in image_as_clusters:
                hot_vector[element]+=1
            trn_hsv_hot_vectors.append(hot_vector)
        print('Elapsed Time: ' + str(timeit.default_timer() - start_time3) + '\n')

    ####################################################################################################################

        print('Calculate uf...')
        start_time3 = timeit.default_timer()
        sum_chi_distance = 0
        count = 0
        for i in range(0,len(trn_hsv_hot_vectors),1):
            for j in range(i+1,len(trn_hsv_hot_vectors),1):
                x = numpy.asarray(trn_hsv_hot_vectors[i])
                y = numpy.asarray(trn_hsv_hot_vectors[j])
                with numpy.errstate(divide='ignore', invalid='ignore'):
                    z = numpy.true_divide((x-y)**2,x+y)
                    z[z == numpy.inf] = 0
                    z = numpy.nan_to_num(z)
                    sum_chi_distance += z.sum()
                count += 1

        uf = 2*count/sum_chi_distance
        #uf = 0.5
        print('uf = ' + str(uf))
        print('Elapsed Time: ' + str(timeit.default_timer() - start_time3) + '\n')

    ####################################################################################################################
        print('Computing Training Kernel...')
        start_time3 = timeit.default_timer()
        #K = linear_kernel(numpy.asarray(trn_hsv_hot_vectors))
        K = chi2_kernel(numpy.asarray(trn_hsv_hot_vectors), gamma=uf)
        print('Size of kernel: ' + str(K.shape))
        print('Elapsed Time: ' + str(timeit.default_timer() - start_time3) + '\n')

    ####################################################################################################################
        print('Applying Kernel to SVM...')
        start_time3 = timeit.default_timer()
        svm = SVC(kernel='precomputed').fit(K,numpy.asarray(trn_labels))
        trn_predict = svm.predict(K).tolist()
        trn_error = 0
        for i in range(0,len(trn_predict),1):
            if trn_labels[i] != trn_predict[i]:
                trn_error+=1
        print('Error on training data: ' + str(trn_error) + '/' + str(len(trn_labels)))
        print('Elapsed Time: ' + str(timeit.default_timer() - start_time3) + '\n')

    ####################################################################################################################
        print('Hot Encoding Validation Data...')
        start_time3 = timeit.default_timer()
        val_hsv_hot_vectors = []
        for image in val_hsv_images:
            image_as_clusters = kmodel.predict(numpy.asarray(image)).tolist()
            hot_vector = [0]*numWords
            for  element in image_as_clusters:
                hot_vector[element]+=1
            val_hsv_hot_vectors.append(hot_vector)
        print('Elapsed Time: ' + str(timeit.default_timer() - start_time3) + '\n')

    ####################################################################################################################
        print('Computing Validation Kernel...')
        start_time3 = timeit.default_timer()
        #K = linear_kernel(X=numpy.asarray(val_hsv_hot_vectors), Y=numpy.asarray(trn_hsv_hot_vectors))
        K = chi2_kernel(X=numpy.asarray(val_hsv_hot_vectors), Y=numpy.asarray(trn_hsv_hot_vectors), gamma=uf)
        print('Size of kernel: ' + str(K.shape))
        print('Elapsed Time: ' + str(timeit.default_timer() - start_time3) + '\n')

    ####################################################################################################################
        print('Predicting Values for Validation Data...')
        start_time3 = timeit.default_timer()
        val_predict = svm.predict(K)
        print 'Prediction array size: ' + str(val_predict.shape)
        print('Elapsed Time: ' + str(timeit.default_timer() - start_time3) + '\n')

    ####################################################################################################################
        print('Calculating Error...')
        start_time3 = timeit.default_timer()
        val_predict = val_predict.tolist()
        error = 0
        for i in range(0,len(val_predict),1):
            if val_labels[i] != val_predict[i]:
                error+=1
        print('Error = ' + str(error) + '/' + str(len(val_labels)))
        print('Elapsed Time: ' + str(timeit.default_timer() - start_time3) + '\n')

    ####################################################################################################################
        if minError == -1 or error <= minError:
            minError = error
            bestWords = numWords

        print('------------------------------------------------------------------')
        print('Elapsed Time for ' + str(numWords) + ' words: ' + str(timeit.default_timer() - start_time2))
        print('------------------------------------------------------------------\n')


    print('Training elapsed Time: ' + str(timeit.default_timer() - training_time))
    print('Best number of words is ' + str(bestWords))
    print('Error = ' + str(minError)+ '\n')
    print('------------------------------------------------------------------\n')
#//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////#


#//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////#
    print('Testing  --------------------------------------------------------')
    testing_time = timeit.default_timer()
########################################################################################################################
    print('Getting H,S,V Features from Training and Validation Set...')
    start_time = timeit.default_timer()
    hsv_features = []
    for image in trn_hsv_images + val_hsv_images:
        # getting features from 1/4 of the training and validation images
        if random.randrange(0,4,1) == 1:
            hsv_features = hsv_features + image

    hsv_features = numpy.asarray(hsv_features)
    print 'Feature matrix: ' + str(hsv_features.shape)
    print('Elapsed Time: ' + str(timeit.default_timer() - start_time) + '\n')

########################################################################################################################
    print('Clustering Features...')
    start_time = timeit.default_timer()
    kmodel = kmeans_cluster(bestWords, hsv_features)
    print('Elapsed Time: ' + str(timeit.default_timer() - start_time) + '\n')

########################################################################################################################
    print('Hot Encoding Training Data...')
    start_time = timeit.default_timer()
    trn_hsv_hot_vectors = []
    for image in trn_hsv_images + val_hsv_images:
        image_as_clusters = kmodel.predict(numpy.asarray(image)).tolist()
        hot_vector = [0]*bestWords
        for  element in image_as_clusters:
            hot_vector[element]+=1
        trn_hsv_hot_vectors.append(hot_vector)
    print('Elapsed Time: ' + str(timeit.default_timer() - start_time) + '\n')

########################################################################################################################

    print('Calculate uf...')
    start_time = timeit.default_timer()
    sum_chi_distance = 0
    count = 0
    for i in range(0,len(trn_hsv_hot_vectors),1):
        for j in range(i+1,len(trn_hsv_hot_vectors),1):
            x = numpy.asarray(trn_hsv_hot_vectors[i])
            y = numpy.asarray(trn_hsv_hot_vectors[j])
            with numpy.errstate(divide='ignore', invalid='ignore'):
                z = numpy.true_divide((x-y)**2,x+y)
                z[z == numpy.inf] = 0
                z = numpy.nan_to_num(z)
                sum_chi_distance += z.sum()
            count += 1
    uf = count/sum_chi_distance
    print('uf = ' + str(uf))
    print('Elapsed Time: ' + str(timeit.default_timer() - start_time) + '\n')

########################################################################################################################
    print('Computing Training Kernel...')
    start_time = timeit.default_timer()
    #K = linear_kernel(numpy.asarray(trn_hsv_hot_vectors))
    K = chi2_kernel(numpy.asarray(trn_hsv_hot_vectors), gamma=uf)
    print('Size of kernel: ' + str(K.shape))
    print('Elapsed Time: ' + str(timeit.default_timer() - start_time) + '\n')

########################################################################################################################
    print('Applying Kernel to SVM...')
    start_time = timeit.default_timer()
    svm = SVC(kernel='precomputed').fit(K,numpy.asarray(trn_labels + val_labels))
    trn_predict = svm.predict(K).tolist()
    trn_error = 0
    trn_labels = trn_labels + val_labels #Overwrites trn_labels
    for i in range(0,len(trn_predict),1):
        if trn_labels[i] != trn_predict[i]:
            trn_error+=1
    print('Error on training data: ' + str(trn_error) + '/' + str(len(trn_labels+val_labels)))
    print('Elapsed Time: ' + str(timeit.default_timer() - start_time) + '\n')

########################################################################################################################
    print('Hot Encoding Testing Data...')
    start_time = timeit.default_timer()
    tst_hsv_hot_vectors = []
    for image in tst_hsv_images:
        image_as_clusters = kmodel.predict(numpy.asarray(image)).tolist()
        hot_vector = [0]*bestWords
        for  element in image_as_clusters:
            hot_vector[element]+=1
        tst_hsv_hot_vectors.append(hot_vector)
    print('Elapsed Time: ' + str(timeit.default_timer() - start_time) + '\n')

########################################################################################################################
    print('Computing Testing Kernel...')
    start_time = timeit.default_timer()
    #K = linear_kernel(X=numpy.asarray(tst_hsv_hot_vectors), Y=numpy.asarray(trn_hsv_hot_vectors))
    K = chi2_kernel(X=numpy.asarray(tst_hsv_hot_vectors), Y=numpy.asarray(trn_hsv_hot_vectors), gamma=uf)
    print('Size of kernel: ' + str(K.shape))
    print('Elapsed Time: ' + str(timeit.default_timer() - start_time) + '\n')

########################################################################################################################
    print('Predicting Values for Testing Data...')
    start_time = timeit.default_timer()
    tst_predict = svm.predict(K)
    print 'Prediction array size: ' + str(tst_predict.shape)
    print('Elapsed Time: ' + str(timeit.default_timer() - start_time) + '\n')

########################################################################################################################
    print('Calculating Error...')
    start_time3 = timeit.default_timer()
    tst_predict = tst_predict.tolist()
    error = 0
    for i in range(0,len(tst_predict),1):
        if tst_labels[i] != tst_predict[i]:
            error+=1
    print('Error = ' + str(error) + '/' + str(len(tst_labels)))
    print('Elapsed Time: ' + str(timeit.default_timer() - start_time3) + '\n')

########################################################################################################################
    print('Testing Elapsed Time: ' + str(timeit.default_timer() - testing_time) + '\n')
    full_time = init_time - timeit.default_timer()

    print('Writing to file...')
    fout = open('svm_results.txt','w')
    fout.write('Execution Time: ' + str(full_time) + '\n')
    fout.write('Minimum Validation Error: ' + str(minError) + '/' + str(len(val_labels)) + '\n')
    fout.write('Testing Error: ' + str(error) + '/' + str(len(tst_labels)) + '\n\n')
    fout.write('Real:   Predict:' + '\n')

    for i in range(0,len(tst_labels),1):
        fout.write(str(tst_labels[i]) + '    ' + str(tst_predict[i]))
        if tst_labels[i] == tst_predict[i]:
            fout.write('    X')
        fout.write('\n------------------------------\n')

    fout.close()
Exemplo n.º 41
0
 def __call__(self, x, y):
     return chi2_kernel(x, y, gamma=self.gamma)