def test_predict(self): # define some easy training data and predict predictive distribution circle1 = Ring(variance=1, radius=3) circle2 = Ring(variance=1, radius=10) n = 100 X = circle1.sample(n / 2).samples X = vstack((X, circle2.sample(n / 2).samples)) y = ones(n) y[:n / 2] = -1.0 # plot(X[:n/2,0], X[:n/2,1], 'ro') # hold(True) # plot(X[n/2:,0], X[n/2:,1], 'bo') # hold(False) # show() covariance = SquaredExponentialCovariance(1, 1) likelihood = LogitLikelihood() gp = GaussianProcess(y, X, covariance, likelihood) # predict on mesh n_test = 20 P = linspace(X[:, 0].min() - 1, X[:, 1].max() + 1, n_test) Q = linspace(X[:, 1].min() - 1, X[:, 1].max() + 1, n_test) X_test = asarray(list(itertools.product(P, Q))) # Y_test = exp(LaplaceApproximation(gp).predict(X_test).reshape(n_test, n_test)) Y_train = exp(LaplaceApproximation(gp).predict(X)) print Y_train print Y_train>0.5 print y
def test_testAverage2(self): # More tests of average. w1 = [0, 1, 1, 1, 1, 0] w2 = [[0, 1, 1, 1, 1, 0], [1, 0, 0, 0, 0, 1]] x = arange(6, dtype=np.float_) assert_equal(average(x, axis=0), 2.5) assert_equal(average(x, axis=0, weights=w1), 2.5) y = array([arange(6, dtype=np.float_), 2.0 * arange(6)]) assert_equal(average(y, None), np.add.reduce(np.arange(6)) * 3. / 12.) assert_equal(average(y, axis=0), np.arange(6) * 3. / 2.) assert_equal(average(y, axis=1), [average(x, axis=0), average(x, axis=0) * 2.0]) assert_equal(average(y, None, weights=w2), 20. / 6.) assert_equal(average(y, axis=0, weights=w2), [0., 1., 2., 3., 4., 10.]) assert_equal(average(y, axis=1), [average(x, axis=0), average(x, axis=0) * 2.0]) m1 = zeros(6) m2 = [0, 0, 1, 1, 0, 0] m3 = [[0, 0, 1, 1, 0, 0], [0, 1, 1, 1, 1, 0]] m4 = ones(6) m5 = [0, 1, 1, 1, 1, 1] assert_equal(average(masked_array(x, m1), axis=0), 2.5) assert_equal(average(masked_array(x, m2), axis=0), 2.5) assert_equal(average(masked_array(x, m4), axis=0).mask, [True]) assert_equal(average(masked_array(x, m5), axis=0), 0.0) assert_equal(count(average(masked_array(x, m4), axis=0)), 0) z = masked_array(y, m3) assert_equal(average(z, None), 20. / 6.) assert_equal(average(z, axis=0), [0., 1., 99., 99., 4.0, 7.5]) assert_equal(average(z, axis=1), [2.5, 5.0]) assert_equal(average(z, axis=0, weights=w2), [0., 1., 99., 99., 4.0, 10.0])
def test_testAverage2(self): # More tests of average. w1 = [0, 1, 1, 1, 1, 0] w2 = [[0, 1, 1, 1, 1, 0], [1, 0, 0, 0, 0, 1]] x = arange(6, dtype=np.float_) assert_equal(average(x, axis=0), 2.5) assert_equal(average(x, axis=0, weights=w1), 2.5) y = array([arange(6, dtype=np.float_), 2.0 * arange(6)]) assert_equal(average(y, None), np.add.reduce(np.arange(6)) * 3. / 12.) assert_equal(average(y, axis=0), np.arange(6) * 3. / 2.) assert_equal( average(y, axis=1), [average(x, axis=0), average(x, axis=0) * 2.0]) assert_equal(average(y, None, weights=w2), 20. / 6.) assert_equal(average(y, axis=0, weights=w2), [0., 1., 2., 3., 4., 10.]) assert_equal( average(y, axis=1), [average(x, axis=0), average(x, axis=0) * 2.0]) m1 = zeros(6) m2 = [0, 0, 1, 1, 0, 0] m3 = [[0, 0, 1, 1, 0, 0], [0, 1, 1, 1, 1, 0]] m4 = ones(6) m5 = [0, 1, 1, 1, 1, 1] assert_equal(average(masked_array(x, m1), axis=0), 2.5) assert_equal(average(masked_array(x, m2), axis=0), 2.5) assert_equal(average(masked_array(x, m4), axis=0).mask, [True]) assert_equal(average(masked_array(x, m5), axis=0), 0.0) assert_equal(count(average(masked_array(x, m4), axis=0)), 0) z = masked_array(y, m3) assert_equal(average(z, None), 20. / 6.) assert_equal(average(z, axis=0), [0., 1., 99., 99., 4.0, 7.5]) assert_equal(average(z, axis=1), [2.5, 5.0]) assert_equal(average(z, axis=0, weights=w2), [0., 1., 99., 99., 4.0, 10.0])
def test_equal_estimates(self): Log.set_loglevel(logging.DEBUG) rr = RussianRoulette(1e-5, block_size=100) log_estimates=randn(1000) log_estimates=ones(1000)*(-942478.011941) print rr.exponential(log_estimates)
def test_equal_estimates(self): Log.set_loglevel(logging.DEBUG) rr = RussianRoulette(1e-5, block_size=100) log_estimates = randn(1000) log_estimates = ones(1000) * (-942478.011941) print rr.exponential(log_estimates)
def test_1d(self): # Tests mr_ on 1D arrays. assert_array_equal(mr_[1, 2, 3, 4, 5, 6], array([1, 2, 3, 4, 5, 6])) b = ones(5) m = [1, 0, 0, 0, 0] d = masked_array(b, mask=m) c = mr_[d, 0, 0, d] self.assertTrue(isinstance(c, MaskedArray)) assert_array_equal(c, [1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1]) assert_array_equal(c.mask, mr_[m, 0, 0, m])
def __init__(self, distribution, num_eigen=2, \ mean_est=array([-2.0, -2.0]), cov_est=0.05 * eye(2), \ sample_discard=500, sample_lag=10, accstar=0.234): AdaptiveMetropolis.__init__(self, distribution=distribution, \ mean_est=mean_est, cov_est=cov_est, \ sample_discard=sample_discard, sample_lag=sample_lag, accstar=accstar) assert (num_eigen <= distribution.dimension) self.num_eigen = num_eigen self.dwscale = self.globalscale * ones([self.num_eigen]) u, s, _ = svd(self.cov_est) self.eigvalues = s[0:self.num_eigen] self.eigvectors = u[:, 0:self.num_eigen]
def __init__(self, dimension=2, num_components=2, components=None, mixing_proportion=None): Distribution.__init__(self, dimension) self.num_components = num_components if (components == None): self.components = [Gaussian(mu=zeros(self.dimension),Sigma=eye(self.dimension)) for _ in range(self.num_components)] else: assert(len(components)==self.num_components) self.components=components if (mixing_proportion == None): self.mixing_proportion=Discrete((1.0/num_components)*ones([num_components])) else: assert(num_components==mixing_proportion.num_objects) self.mixing_proportion = mixing_proportion
def test_testAverage3(self): # Yet more tests of average! a = arange(6) b = arange(6) * 3 r1, w1 = average([[a, b], [b, a]], axis=1, returned=1) assert_equal(shape(r1), shape(w1)) assert_equal(r1.shape, w1.shape) r2, w2 = average(ones((2, 2, 3)), axis=0, weights=[3, 1], returned=1) assert_equal(shape(w2), shape(r2)) r2, w2 = average(ones((2, 2, 3)), returned=1) assert_equal(shape(w2), shape(r2)) r2, w2 = average(ones((2, 2, 3)), weights=ones((2, 2, 3)), returned=1) assert_equal(shape(w2), shape(r2)) a2d = array([[1, 2], [0, 4]], float) a2dm = masked_array(a2d, [[False, False], [True, False]]) a2da = average(a2d, axis=0) assert_equal(a2da, [0.5, 3.0]) a2dma = average(a2dm, axis=0) assert_equal(a2dma, [1.0, 3.0]) a2dma = average(a2dm, axis=None) assert_equal(a2dma, 7. / 3.) a2dma = average(a2dm, axis=1) assert_equal(a2dma, [1.5, 4.0])
def sample_real( n=1, mean_top=[0, 2], std_top=0.2, weights_top=[0.5, 0.5], std_middle=0.2, std_bottom=0.2, weights_bottom=[0.5, 0.5], ): """ This code samples n times from the model p(x1)p(x2|x1)p(x3|x1)p(x4|x2,x3)p(x5|x3), where all distributions' domains are the real line, i.e., p(x1) - mixture of 2 Gaussians with fixed means and fixed variance p(x2|x1),p(x3|x1) - Gaussian whose mean is given by sample from x1, fixed variance p(x4|x2,x3) - mixture of 2 Gaussians whose mean is either 0.5*(x2+x3) or zero, fixed variance p(x5|x3) - mixture of 2 Gaussians whose mean is either x3 or zero, fixed variance returns a dictionary {node_ind -> samples} """ assert sum(weights_top) == 1 assert sum(weights_bottom) == 1 mean_x1 = ones(n) * mean_top[0] mean_x1[rand(n) < weights_top[0]] = mean_top[1] x1 = mean_x1 + randn(n) * std_top x2 = x1 + randn(n) * std_middle x3 = x1 + randn(n) * std_middle mean_x4 = (x2 + x3) * 0.5 mean_x4[rand(n) < weights_bottom[0]] = 0 x4 = mean_x4 + randn(n) * std_bottom mean_x5 = deepcopy(x3) mean_x5[rand(n) < weights_bottom[0]] = 0 x5 = mean_x5 + randn(n) * std_bottom return {1: x1, 2: x2, 3: x3, 4: x4, 5: x5}
def __init__(self, distribution, \ mean_est=None, cov_est=None, \ sample_discard=500, sample_lag=20, accstar=0.234): MCMCSampler.__init__(self, distribution) self.globalscale = (2.38 ** 2) / distribution.dimension if mean_est is None: mean_est=2*ones(distribution.dimension) if cov_est is None: cov_est=0.05 * eye(distribution.dimension) assert (len(mean_est) == distribution.dimension) assert (len(cov_est) == distribution.dimension) self.mean_est = mean_est self.cov_est = cov_est self.sample_discard = sample_discard self.sample_lag = sample_lag self.accstar = accstar
def __init__(self, dimension=2, num_components=2, components=None, mixing_proportion=None): Distribution.__init__(self, dimension) self.num_components = num_components if (components == None): self.components = [ Gaussian(mu=zeros(self.dimension), Sigma=eye(self.dimension)) for _ in range(self.num_components) ] else: assert (len(components) == self.num_components) self.components = components if (mixing_proportion == None): self.mixing_proportion = Discrete( (1.0 / num_components) * ones([num_components])) else: assert (num_components == mixing_proportion.num_objects) self.mixing_proportion = mixing_proportion
def _calculateHitContainmentMatrix(self, curve_residuals, tau): ''' Calculates a 2d array where the (i,j)th entry is the proportion of elements in curve_residuals[i]['coverage_indices'][tau] that are also contained in curve_residuals[j]['coverage_indices'][tau] A value of -1 indicates that the curve corresponding to the row index had no hits within tau of the curve ''' num_curves = len(curve_residuals) containment_matrix = ones((num_curves, num_curves)) for i in range(num_curves): labels_i = curve_residuals[i]['coverage_indices'][tau] for j in range(i+1, num_curves): labels_j = curve_residuals[j]['coverage_indices'][tau] cardinality_intersect = len(labels_i & labels_j) if len(labels_i) == 0: containment_matrix[i, j] = -1 else: containment_matrix[i, j] = float(cardinality_intersect)/len(labels_i) if len(labels_j) == 0: containment_matrix[j,i] = -1 else: containment_matrix[j,i] = float(cardinality_intersect)/len(labels_j) return containment_matrix
# throw away some data n = 250 seed(1) idx = permutation(len(data)) idx = idx[:n] data = data[idx] labels = labels[idx] # normalise and whiten dataset data -= mean(data, 0) L = cholesky(cov(data.T)) data = solve_triangular(L, data.T, lower=True).T dim = shape(data)[1] # prior on theta and posterior target estimate theta_prior = Gaussian(mu=0 * ones(dim), Sigma=eye(dim) * 5) target=PseudoMarginalHyperparameterDistribution(data, labels, \ n_importance=100, prior=theta_prior, \ ridge=1e-3) # create sampler burnin = 10000 num_iterations = burnin + 300000 kernel = GaussianKernel(sigma=23.0) sampler = KameleonWindowLearnScale(target, kernel, stop_adapt=burnin) # sampler=AdaptiveMetropolisLearnScale(target) # sampler=StandardMetropolis(target) # posterior mode derived by initial tests start = zeros(target.dimension) params = MCMCParams(start=start,
data=vstack((data_circle, data_rect)) labels=hstack((labels_circle, labels_rect)) dim=shape(data)[1] # normalise data data-=mean(data, 0) data/=std(data,0) # plot idx_a=labels>0 idx_b=labels<0 plot(data[idx_a,0], data[idx_a,1],"ro") plot(data[idx_b,0], data[idx_b,1],"bo") # prior on theta and posterior target estimate theta_prior=Gaussian(mu=0*ones(dim), Sigma=eye(dim)*5) target=PseudoMarginalHyperparameterDistribution(data, labels, \ n_importance=100, prior=theta_prior, \ ridge=1e-3) # create sampler burnin=10000 num_iterations=burnin+300000 kernel = GaussianKernel(sigma=35.0) sampler=KameleonWindowLearnScale(target, kernel, stop_adapt=burnin) # sampler=AdaptiveMetropolisLearnScale(target) # sampler=StandardMetropolis(target) start=0.0*ones(target.dimension) params = MCMCParams(start=start, num_iterations=num_iterations, burnin=burnin)
def fun(k,x,y): print('haha') cost=(k*x-y)**2 w_grad=1 return [cost,w_grad] def fun1(): return 1 k0=1 x=1 y=1 max_iterations=10 #scipy.optimize.minimize(fun,k0,args=(x,y,),options = {'maxiter': max_iterations}) #scipy.optimize.leastsq(fun1,) a=ones(2) b=ones(2).reshape(2,1) b[0,0]=2 b[1,0]=4 print(a,b) print(a*b) print(dot(a,b)) print('-------------------------') import sys sys.path.append('D:\eclipse_workspace\py_base\src') group,labels=kNN.createDataSet() print('group:',group) print('labels:',labels) res=kNN.classify0([0,0], group, labels, 3)
data = vstack((data_circle, data_rect)) labels = hstack((labels_circle, labels_rect)) dim = shape(data)[1] # normalise data data -= mean(data, 0) data /= std(data, 0) # plot idx_a = labels > 0 idx_b = labels < 0 plot(data[idx_a, 0], data[idx_a, 1], "ro") plot(data[idx_b, 0], data[idx_b, 1], "bo") # prior on theta and posterior target estimate theta_prior = Gaussian(mu=0 * ones(dim), Sigma=eye(dim) * 5) target=PseudoMarginalHyperparameterDistribution(data, labels, \ n_importance=100, prior=theta_prior, \ ridge=1e-3) # create sampler burnin = 10000 num_iterations = burnin + 300000 kernel = GaussianKernel(sigma=35.0) sampler = KameleonWindowLearnScale(target, kernel, stop_adapt=burnin) # sampler=AdaptiveMetropolisLearnScale(target) # sampler=StandardMetropolis(target) start = 0.0 * ones(target.dimension) params = MCMCParams(start=start, num_iterations=num_iterations,
# loop over parameters here experiment_dir = experiment_dir_base + str(os.path.abspath(sys.argv[0])).split(os.sep)[-1].split(".")[0] + os.sep print "running experiments", n, "times at base", experiment_dir # load data data,labels=GPData.get_glass_data() # normalise and whiten dataset data-=mean(data, 0) L=cholesky(cov(data.T)) data=solve_triangular(L, data.T, lower=True).T dim=shape(data)[1] # prior on theta and posterior target estimate theta_prior=Gaussian(mu=0*ones(dim), Sigma=eye(dim)*5) distribution=PseudoMarginalHyperparameterDistribution(data, labels, \ n_importance=100, prior=theta_prior, \ ridge=1e-3) sigma = 23.0 print "using sigma", sigma kernel = GaussianKernel(sigma=sigma) for i in range(n): mcmc_samplers = [] burnin=20000 num_iterations=100000
def _compute_coefs(self, xx, yy, p=None, var=1): x, y = np.atleast_1d(xx, yy) x = x.ravel() dx = np.diff(x) must_sort = (dx < 0).any() if must_sort: ind = x.argsort() x = x[ind] y = y[..., ind] dx = np.diff(x) n = len(x) # ndy = y.ndim szy = y.shape nd = prod(szy[:-1]) ny = szy[-1] if n < 2: raise ValueError('There must be >=2 data points.') elif (dx <= 0).any(): raise ValueError('Two consecutive values in x can not be equal.') elif n != ny: raise ValueError('x and y must have the same length.') dydx = np.diff(y) / dx if (n == 2): # % straight line coefs = np.vstack([dydx.ravel(), y[0, :]]) else: dx1 = 1. / dx D = sparse.spdiags(var * ones(n), 0, n, n) # The variance u, p = self._compute_u(p, D, dydx, dx, dx1, n) dx1.shape = (n - 1, -1) dx.shape = (n - 1, -1) zrs = zeros(nd) if p < 1: # faster than yi-6*(1-p)*Q*u Qu = D * diff(vstack( [zrs, diff(vstack([zrs, u, zrs]), axis=0) * dx1, zrs]), axis=0) ai = (y - (6 * (1 - p) * Qu).T).T else: ai = y.reshape(n, -1) # The piecewise polynominals are written as # fi=ai+bi*(x-xi)+ci*(x-xi)^2+di*(x-xi)^3 # where the derivatives in the knots according to Carl de Boor are: # ddfi = 6*p*[0;u] = 2*ci; # dddfi = 2*diff([ci;0])./dx = 6*di; # dfi = diff(ai)./dx-(ci+di.*dx).*dx = bi; ci = np.vstack([zrs, 3 * p * u]) di = (diff(vstack([ci, zrs]), axis=0) * dx1 / 3) bi = (diff(ai, axis=0) * dx1 - (ci + di * dx) * dx) ai = ai[:n - 1, ...] if nd > 1: di = di.T ci = ci.T ai = ai.T if not any(di): if not any(ci): coefs = vstack([bi.ravel(), ai.ravel()]) else: coefs = vstack([ci.ravel(), bi.ravel(), ai.ravel()]) else: coefs = vstack( [di.ravel(), ci.ravel(), bi.ravel(), ai.ravel()]) return coefs, x
def _compute_coefs(self, xx, yy, p=None, var=1): x, y = np.atleast_1d(xx, yy) x = x.ravel() dx = np.diff(x) must_sort = (dx < 0).any() if must_sort: ind = x.argsort() x = x[ind] y = y[..., ind] dx = np.diff(x) n = len(x) #ndy = y.ndim szy = y.shape nd = prod(szy[:-1]) ny = szy[-1] if n < 2: raise ValueError('There must be >=2 data points.') elif (dx <= 0).any(): raise ValueError('Two consecutive values in x can not be equal.') elif n != ny: raise ValueError('x and y must have the same length.') dydx = np.diff(y) / dx if (n == 2): # % straight line coefs = np.vstack([dydx.ravel(), y[0, :]]) else: dx1 = 1. / dx D = sp.spdiags(var * ones(n), 0, n, n) # The variance u, p = self._compute_u(p, D, dydx, dx, dx1, n) dx1.shape = (n - 1, -1) dx.shape = (n - 1, -1) zrs = zeros(nd) if p < 1: # faster than yi-6*(1-p)*Q*u ai = (y - (6 * (1 - p) * D * diff(vstack([zrs, diff(vstack([zrs, u, zrs]), axis=0) * dx1, zrs]), axis=0)).T).T else: ai = y.reshape(n, -1) # The piecewise polynominals are written as # fi=ai+bi*(x-xi)+ci*(x-xi)^2+di*(x-xi)^3 # where the derivatives in the knots according to Carl de Boor are: # ddfi = 6*p*[0;u] = 2*ci; # dddfi = 2*diff([ci;0])./dx = 6*di; # dfi = diff(ai)./dx-(ci+di.*dx).*dx = bi; ci = np.vstack([zrs, 3 * p * u]) di = (diff(vstack([ci, zrs]), axis=0) * dx1 / 3) bi = (diff(ai, axis=0) * dx1 - (ci + di * dx) * dx) ai = ai[:n - 1, ...] if nd > 1: di = di.T ci = ci.T ai = ai.T if not any(di): if not any(ci): coefs = vstack([bi.ravel(), ai.ravel()]) else: coefs = vstack([ci.ravel(), bi.ravel(), ai.ravel()]) else: coefs = vstack( [di.ravel(), ci.ravel(), bi.ravel(), ai.ravel()]) return coefs, x
def incomplete_cholesky(X, kernel, eta, power=1, blocksize=100): """ Computes the incomplete Cholesky factorisation of the kernel matrix defined by samples X and a given kernel. The kernel is evaluated on-the-fly. The optional power parameter is used to multiply the kernel output with itself. Original code from "Kernel Methods for Pattern Analysis" by Shawe-Taylor and Cristianini. Modified to compute kernel on the fly, to use kernels multiplied with themselves (tensor product), and optimised speed via using vector operations and not pre-allocate full kernel matrix memory, but rather allocate memory of low-rank kernel block-wise Changes by Heiko Strathmann parameters: X - list of input vectors to evaluate kernel on kernel - a kernel object with a kernel method that takes 2d-arrays and returns a psd kernel matrix eta - precision cutoff parameter for the low-rank approximation. Lies is (0,1) where smaller means more accurate. power - every kernel evaluation is multiplied with itself this number of times. Zero is supported blocksize - tuning parameter for speed, determines how rows elements are allocated in a block for the (growing) kernel matrix. Larger means faster algorithm (to some extend if low rank dimension is larger than blocksize) output: K_chol, ell, I, R, W, where K - is the kernel using only the pivot index features I - is a list containing the pivots used to compute K_chol R - is a low-rank factor such that R.T.dot(R) approximates the original K W - is a matrix such that W.T.dot(K_chol.dot(W)) approximates the original K """ assert(eta>0 and eta<1) assert(power>=0) assert(blocksize>=0) assert(len(X)>=0) m=len(X) # growing low rank basis R=zeros((blocksize,m)) # diagonal (assumed to be one) d=ones(m) # used indices I=[] nu=[] # algorithm is executed as long as a is bigger than eta precision a=d.max() I.append(d.argmax()) # growing set of evaluated kernel values K=zeros((blocksize,m)) j=0 while a>eta: nu.append(sqrt(a)) if power>=1: K[j,:]=kernel.kernel([X[I[j]]], X)**power else: K[j,:]=ones(m) if j==0: R_dot_j=0 elif j==1: R_dot_j=R[:j,:]*R[:j,I[j]] else: R_dot_j=R[:j,:].T.dot(R[:j,I[j]]) R[j,:]=(K[j,:] - R_dot_j)/nu[j] d=d-R[j,:]**2 a=d.max() I.append(d.argmax()) j=j+1 # allocate more space for kernel if j>=len(K): K=vstack((K, zeros((blocksize,m)))) R=vstack((R, zeros((blocksize,m)))) # remove un-used rows which were located unnecessarily K=K[:j,:] R=R[:j,:] # remove list pivot index since it is not used I=I[:-1] # from low rank to full rank W=solve(R[:,I], R) # low rank K K_chol=K[:,I] return K_chol, I, R, W