def data_normalize_inplace(x, power = 0.5, compute_norm_subset = None): if compute_norm_subset != None: x_subset = x[compute_norm_subset] else: x_subset = x mu = x_subset.mean(axis=0) sigma = (x_subset - mu).std(axis=0) if np.min(sigma) == 0: warnings.warn("At least one dimension of the data has zero variance.") sigma[sigma == 0] = 1. del x_subset x -= mu x *= 1. / sigma if power == 1: pass elif power == 0.5: yael.fvec_ssqrt(yael.numpy_to_fvec_ref(x), x.size) else: yael.fvec_spow(yael.numpy_to_fvec_ref(x), x.size, power) yael.fmat_normalize_columns_l2sqr_pow(yael.numpy_to_fvec_ref(x), x.shape[1], x.shape[0], -0.5) return mu, sigma
def scores_to_probas(scores, A, B): probas = np.empty(scores.size, dtype = np.float32) scores = scores.astype(np.float32) libsvm_precomputed.scores_to_probas(A, B, scores.size, yael.numpy_to_fvec_ref(scores), yael.numpy_to_fvec_ref(probas)) return probas
def initYaelGmm(self): self.yael_gmm = yael.gmm_t() self.yael_gmm.d = self.n_features self.yael_gmm.k = self.n_components self.yael_gmm.mu = yael.numpy_to_fvec_ref(self.means_) self.yael_gmm.sigma = yael.numpy_to_fvec(self.covars_) self.yael_gmm.w = yael.numpy_to_fvec_ref(self.weights_)
def eval_params(self, params, fold): train_index, test_index = self.splits[fold][:2] if len(self.splits[fold]) == 3: # useful for multiclass optimization cx = self.splits[fold][2] else: cx = self.cx c = params['c'] pos_weight = params['positive_weight'] Kxx = combine_kernels(self.Kxx, params) dual_coef, bias = libsvm_train(Kxx, cx, train_index, c = c, pos_weight = pos_weight) scores = np.empty(test_index.size, dtype = np.float32) libsvm_precomputed.mul_matvec_subset( yael.numpy_to_fvec_ref(Kxx), Kxx.shape[1], yael.numpy_to_ivec_ref(train_index), train_index.size, yael.numpy_to_ivec_ref(test_index), test_index.size, yael.numpy_to_dvec_ref(dual_coef), yael.numpy_to_fvec_ref(scores)) scores += bias if self.criterion == 'ap': perf = average_precision(cx[test_index], scores) elif self.criterion == 'dcr': perf = 1 - compute_dcr(cx[test_index], scores) elif self.criterion == 'sdcr': perf = 1 - surrogate_dcr(cx[test_index], scores) else: assert False #### microscopic penalizations # to favor the lowest c among ties perf -= math.log(c) * 1e-6 # to favor positive_weight == 1 perf -= abs(math.log(pos_weight)) * 1e-6 stats = Stats() stats.valid_accuracies = np.array([perf]) return stats
def descs_to_sstats(xx, gmm): """ Converts the descriptors to sufficient statistics. Inputs ------ xx: array [nr_descs, nr_dimensions] Data matrix containing the descriptors. gmm: yael.gmm instance Mixture of Gaussian object. Output ------ Q_sum: array [nr_clusters, ] Averaged posterior probabilities. """ K = gmm.k N = xx.shape[0] # Compute posterior probabilities using yael. Q_yael = fvec_new(N * K) gmm_compute_p(N, numpy_to_fvec_ref(xx), gmm, Q_yael, GMM_FLAGS_W) Q = fvec_to_numpy(Q_yael, N * K).reshape(N, K) yael.free(Q_yael) # Compute statistics. Q_sum = sum(Q, 0) / N # 1xK return np.array(Q_sum, dtype=np.float32)
def descs_to_sstats(xx, gmm): """ Converts the descriptors to sufficient statistics. Inputs ------ xx: array [nr_descs, nr_dimensions] Data matrix containing the descriptors. gmm: yael.gmm instance Mixture of Gaussian object. Output ------ sstats: array [nr_clusters + 2 * nr_clusters * nr_dimensions, ] Concatenation of the averaged posterior probabilities `Q_sum`, the first moment `Q_xx` and second-order moment `Q_xx_2`. """ xx = np.atleast_2d(xx) N = xx.shape[0] K = gmm.k D = gmm.d # Compute posterior probabilities using yael. Q_yael = fvec_new(N * K) gmm_compute_p(N, numpy_to_fvec_ref(xx), gmm, Q_yael, GMM_FLAGS_W) Q = fvec_to_numpy(Q_yael, N * K).reshape(N, K) # NxK yael.free(Q_yael) # Compute statistics. sstats = np.zeros(K + 2 * K * D, dtype=np.float32) sstats[: K] = np.sum(Q, 0) / N # 1xK sstats[K: K + K * D] = dot(Q.T, xx).flatten() / N # 1xKD sstats[K + K * D: K + 2 * K * D] = dot( Q.T, xx ** 2).flatten() / N # 1xKD return sstats
def compute_gmm(data, nr_clusters, nr_iterations, nr_threads, seed, nr_redos): """Computes GMM using yael functions.""" N, D = data.shape data = np.ascontiguousarray(data) return gmm_learn( D, N, nr_clusters, nr_iterations, numpy_to_fvec_ref(data), nr_threads, seed, nr_redos, GMM_FLAGS_W)
def descs_to_sstats(xx, gmm): """ Converts the descriptors to sufficient statistics. Inputs ------ xx: array [nr_descs, nr_dimensions] Data matrix containing the descriptors. gmm: yael.gmm instance Mixture of Gaussian object. Output ------ sstats: array [nr_clusters + 2 * nr_clusters * nr_dimensions, ] Concatenation of the averaged posterior probabilities `Q_sum`, the first moment `Q_xx` and second-order moment `Q_xx_2`. """ xx = np.atleast_2d(xx) N = xx.shape[0] K = gmm.k D = gmm.d # Compute posterior probabilities using yael. Q_yael = fvec_new(N * K) gmm_compute_p(N, numpy_to_fvec_ref(xx), gmm, Q_yael, GMM_FLAGS_W) Q = fvec_to_numpy(Q_yael, N * K).reshape(N, K) # NxK yael.free(Q_yael) # Compute statistics. sstats = np.zeros(K + 2 * K * D, dtype=np.float32) sstats[:K] = np.sum(Q, 0) / N # 1xK sstats[K:K + K * D] = dot(Q.T, xx).flatten() / N # 1xKD sstats[K + K * D:K + 2 * K * D] = dot(Q.T, xx**2).flatten() / N # 1xKD return sstats
def gmm_predict_proba(xx, gmm): """Computes posterior probabilities using yael.""" N = xx.shape[0] K = gmm.k Q_yael = yael.fvec_new(N * K) yael.gmm_compute_p( N, yael.numpy_to_fvec_ref(xx), gmm, Q_yael, yael.GMM_FLAGS_W) Q = yael.fvec_to_numpy(Q_yael, N * K).reshape(N, K) yael.free(Q_yael) return Q
def libsvm_train(Kxx, cx, subset, c, pos_weight = 1.0, eps = 1e-3, verbose = 0, probability = 0): # check input assert Kxx.shape[0] == Kxx.shape[1] and Kxx.flags.c_contiguous, Kxx.shape assert subset.flags.c_contiguous and cx.flags.c_contiguous assert np.all(subset < Kxx.shape[0]) and np.all(subset >= 0) # set libsvm params param = libsvm_precomputed.svm_parameter() libsvm_precomputed.svm_param_set_default(param) param.nr_weight = 2 param.weight_label = weight_label = yael.ivec(2) weight_label[0] = -1 weight_label[1] = 1 param.weight = weights = yael.dvec(2) npos = (cx[subset] == 1).sum() nneg = (cx[subset] == -1).sum() weights[0] = 2 * npos / float(npos + nneg) weights[1] = 2 * nneg / float(npos + nneg) * pos_weight param.C = c param.nu = param.p = 0 param.shrinking = 1 param.probability = probability param.eps = eps libsvm_precomputed.svm_set_verbose(verbose) # prepare output nex = subset.size dual_coeffs = np.empty((nex,), dtype = np.float64) bias_out = yael.dvec(3) # actual call ret = libsvm_precomputed.svm_train_precomputed( nex, yael.numpy_to_ivec_ref(subset), yael.numpy_to_ivec_ref(cx), yael.numpy_to_fvec_ref(Kxx), Kxx.shape[1], param, yael.numpy_to_dvec_ref(dual_coeffs), bias_out) assert ret > 0 bias_term = bias_out[0] #print dual_coeffs, bias_term if probability: probA = bias_out[1] probB = bias_out[2] return dual_coeffs, bias_term, probA, probB else: return dual_coeffs, bias_term
def fit(self, X): n_samples, self.n_features = X.shape yael_X = yael.numpy_to_fvec_ref(X) yael_gmm = yael.gmm_learn( self.n_features, n_samples, self.n_components, self.n_iter, yael_X, self.n_threads, 0, self.n_init, yael.GMM_FLAGS_W | yael.GMM_FLAGS_SIGMA | yael.GMM_FLAGS_MU) self.means_ = yael.fvec_to_numpy( yael_gmm.mu, self.n_components * self.n_features).reshape( (self.n_components, self.n_features)) self.covars_ = yael.fvec_to_numpy( yael_gmm.sigma, self.n_components * self.n_features).reshape( (self.n_components, self.n_features)) self.weights_ = yael.fvec_to_numpy(yael_gmm.w, self.n_components) yael.gmm_delete(yael_gmm)
def computeResponsabilities(self, X): if self.yael_gmm is None: self.initYaelGmm() if len(X.shape) == 1: n_samples = 1 else: n_samples = X.shape[0] yael_X = yael.numpy_to_fvec_ref(X) yael_p = yael.fvec_new_0(self.n_components * n_samples) yael.gmm_compute_p_thread( n_samples, yael_X, self.yael_gmm, yael_p, yael.GMM_FLAGS_W | yael.GMM_FLAGS_SIGMA | yael.GMM_FLAGS_MU, self.n_threads) return yael.fvec_to_numpy_acquire(yael_p, n_samples * self.n_components).reshape( (n_samples, self.n_components))
def descs_to_spatial_sstats(xx, ll, gmm): """ Computes spatial statistics from descriptors and their position. Inputs ------ xx: array [N, D], required N D-dimensional descriptors from an video (usually, after they are processed with PCA). ll: array [N, 3], required Descriptor locations in an image; on each row, we have the triplet (x, y, t). gmm: instance of yael object gmm Gauassian mixture object. Output ------ ss: array [1, K + 2 * 3 * k] Sufficient statistics in the form of a vector that concatenates (i) the sum of posteriors, (ii) an expected value of the locations ll under the posterior distribution Q and (iii) the second-order moment of the locations ll under the posterior distribution Q. """ N = ll.shape[0] K = gmm.k # Compute posterior probabilities using yael. Q_yael = fvec_new(N * K) gmm_compute_p(N, numpy_to_fvec_ref(xx), gmm, Q_yael, GMM_FLAGS_W) Q = fvec_to_numpy(Q_yael, N * K).reshape(N, K) yael.free(Q_yael) # Compute statistics. Q_sum = sum(Q, 0) / N # 1xK Q_ll = dot(Q.T, ll).flatten() / N # 1x3K Q_ll_2 = dot(Q.T, ll**2).flatten() / N # 1x3K return np.array(hstack((Q_sum, Q_ll, Q_ll_2)), dtype=np.float32)
def descs_to_spatial_sstats(xx, ll, gmm): """ Computes spatial statistics from descriptors and their position. Inputs ------ xx: array [N, D], required N D-dimensional descriptors from an video (usually, after they are processed with PCA). ll: array [N, 3], required Descriptor locations in an image; on each row, we have the triplet (x, y, t). gmm: instance of yael object gmm Gauassian mixture object. Output ------ ss: array [1, K + 2 * 3 * k] Sufficient statistics in the form of a vector that concatenates (i) the sum of posteriors, (ii) an expected value of the locations ll under the posterior distribution Q and (iii) the second-order moment of the locations ll under the posterior distribution Q. """ N = ll.shape[0] K = gmm.k # Compute posterior probabilities using yael. Q_yael = fvec_new(N * K) gmm_compute_p(N, numpy_to_fvec_ref(xx), gmm, Q_yael, GMM_FLAGS_W) Q = fvec_to_numpy(Q_yael, N * K).reshape(N, K) yael.free(Q_yael) # Compute statistics. Q_sum = sum(Q, 0) / N # 1xK Q_ll = dot(Q.T, ll).flatten() / N # 1x3K Q_ll_2 = dot(Q.T, ll ** 2).flatten() / N # 1x3K return np.array(hstack((Q_sum, Q_ll, Q_ll_2)), dtype=np.float32)
print('-> Yael A') yael_a = yael.FloatArray.acquirepointer(yael.numpy_to_fvec(numpy_a)) n = numpy_a.size yael.fvec_print(yael_a, n) print('-> Numpy A') print(yael.fvec_to_numpy(yael_a, n)) print('int array') numpy_a = np.array(list(range(5)), dtype='int32') print(numpy_a) print('-> Yael A2') yael_a = yael.IntArray.acquirepointer(yael.numpy_to_ivec(numpy_a)) n = numpy_a.size yael.ivec_print(yael_a, n) print('-> Numpy A2') print(yael.ivec_to_numpy(yael_a, n)) print('float array, pass by reference') numpy_a = np.array(list(range(5)), dtype='float32') print(numpy_a) if '--force-crash' in sys.argv: yael_a = yael.FloatArray.acquirepointer(yael.numpy_to_fvec_ref(numpy_a)) n = numpy_a.size yael.fvec_print(yael_a, n) del numpy_a print('Forced Crash Example!') yael.fvec_print(yael_a, n)
def compute_gmm(data, nr_clusters, nr_iterations, nr_threads, seed, nr_redos): """ Computes GMM using yael functions. """ N, D = data.shape gmm = gmm_learn(D, N, nr_clusters, nr_iterations, numpy_to_fvec_ref(data), nr_threads, seed, nr_redos, GMM_FLAGS_W) return gmm