def gaussian_dd(n_samples, n_features=2): ##{{{ """ SBCK.datasets.gaussian_dd ========================= Build a test dataset such that X0, X1 and Y0 are multivariate normal distribution. Parameters ---------- n_samples : integer Number of samples in X0, X1 and Y0 n_features : integer dimension, default is 2 Returns ------- Y0,X0,X1 : tuple - Y0 reference dataset in calibration period - X0 biased dataset in calibration period - X1 biased dataset in projection period """ X0 = np.random.multivariate_normal(mean=np.zeros(n_features), cov=skd.make_spd_matrix(n_features), size=n_samples) X1 = np.random.multivariate_normal(mean=np.zeros(n_features) + 5, cov=skd.make_spd_matrix(n_features), size=n_samples) Y0 = np.random.multivariate_normal(mean=np.zeros(n_features) - 2, cov=skd.make_spd_matrix(n_features), size=n_samples) return Y0, X0, X1
def __init__(self, rng, n_samples=200, n_components=2, n_features=2, scale=50): self.n_samples = n_samples self.n_components = n_components self.n_features = n_features self.weights = rng.rand(n_components) self.weights = self.weights / self.weights.sum() self.means = rng.rand(n_components, n_features) * scale self.covariances = { 'spherical': .5 + rng.rand(n_components), 'diag': (.5 + rng.rand(n_components, n_features)) ** 2, 'tied': make_spd_matrix(n_features, random_state=rng), 'full': np.array([ make_spd_matrix(n_features, random_state=rng) * .5 for _ in range(n_components)])} self.precisions = { 'spherical': 1. / self.covariances['spherical'], 'diag': 1. / self.covariances['diag'], 'tied': linalg.inv(self.covariances['tied']), 'full': np.array([linalg.inv(covariance) for covariance in self.covariances['full']])} self.X = dict(zip(COVARIANCE_TYPE, [generate_data( n_samples, n_features, self.weights, self.means, self.covariances, covar_type) for covar_type in COVARIANCE_TYPE])) self.Y = np.hstack([np.full(int(np.round(w * n_samples)), k, dtype=int) for k, w in enumerate(self.weights)])
def main(): p1 = make_spd_matrix(2) p2 = make_spd_matrix(2) p3 = avg_func(p1, p2, 0.5) plot_data(p1, p2, p3) return
def __init__(self, rng, n_samples=200, n_components=2, n_features=2, scale=50): self.n_samples = n_samples self.n_components = n_components self.n_features = n_features self.weights = rng.rand(n_components) self.weights = self.weights / self.weights.sum() self.means = rng.rand(n_components, n_features) * scale self.covariances = { "spherical": 0.5 + rng.rand(n_components), "diag": (0.5 + rng.rand(n_components, n_features))**2, "tied": make_spd_matrix(n_features, random_state=rng), "full": np.array([ make_spd_matrix(n_features, random_state=rng) * 0.5 for _ in range(n_components) ]), } self.precisions = { "spherical": 1.0 / self.covariances["spherical"], "diag": 1.0 / self.covariances["diag"], "tied": linalg.inv(self.covariances["tied"]), "full": np.array([ linalg.inv(covariance) for covariance in self.covariances["full"] ]), } self.X = dict( zip( COVARIANCE_TYPE, [ generate_data( n_samples, n_features, self.weights, self.means, self.covariances, covar_type, ) for covar_type in COVARIANCE_TYPE ], )) self.Y = np.hstack([ np.full(int(np.round(w * n_samples)), k, dtype=int) for k, w in enumerate(self.weights) ])
def test_Metric(self): np.random.seed(28) for d in [iris, wine, breast_cancer]: X, y = d() n, d = X.shape M = make_spd_matrix(d) metric = Metric(M) metric.fit(X, y) L = metric.transformer() assert_array_almost_equal(L.T.dot(L), M) LX1 = metric.transform() LX2 = metric.transform(X) dl1 = pdist(LX1) dl2 = pdist(LX2) dm = pdist(X, metric='mahalanobis', VI=M) # CHecking that d_M = d_L assert_array_almost_equal(dm, dl1) assert_array_almost_equal(dm, dl2) d_, d = L.shape e_, e = M.shape assert_equal(d, e_) assert_equal(d, e) assert_equal(d, X.shape[1])
def generate_data_bivariate(request): n_p = request.param np.random.seed(1111) # setting parameters n = n_p[0] p = n_p[1] theta = np.array([0.5, 0.9]) b = [1 / k for k in range(1, p + 1)] sigma = make_spd_matrix(p) # generating data x = np.random.multivariate_normal(np.zeros(p), sigma, size=[n, ]) G = _g(np.dot(x, b)) M0 = _m(np.dot(x, b)) M1 = _m2(np.dot(x, b)) D0 = M0 + np.random.standard_normal(size=[n, ]) D1 = M1 + np.random.standard_normal(size=[n, ]) y = theta[0] * D0 + theta[1] * D1 + G + np.random.standard_normal(size=[n, ]) d = np.column_stack((D0, D1)) column_names = [f'X{i + 1}' for i in np.arange(p)] + ['y'] + \ [f'd{i + 1}' for i in np.arange(2)] data = pd.DataFrame(np.column_stack((x, y, d)), columns=column_names) return data
def generate_general(nv, m, ns, normalize=False, shuffle=False): """ Generate general data using make_spd_matrix() function. :param nv: Number of observed variables :param m: Number of latent factors :param ns: Number of samples for each time step :param normalize: Whether to set Var[x] = 1 :param shuffle: Whether to shuffle to x_i's :return: (data, ground_truth_cov) """ assert nv % m == 0 b = nv // m # block size sigma = np.zeros((nv, nv)) for i in range(m): block_cov = make_spd_matrix(b) if normalize: std = np.sqrt(block_cov.diagonal()).reshape((b, 1)) block_cov /= std block_cov /= std.T sigma[i * b:(i + 1) * b, i * b:(i + 1) * b] = block_cov if shuffle: perm = range(nv) random.shuffle(perm) sigma_perm = np.zeros((nv, nv)) for i in range(nv): for j in range(nv): sigma_perm[i, j] = sigma[perm[i], perm[j]] sigma = sigma_perm mu = np.zeros((nv,)) return np.random.multivariate_normal(mu, sigma, size=(ns,)), sigma
def generate_data_iivm_binary(request): n_p = request.param np.random.seed(1111) # setting parameters n = n_p[0] p = n_p[1] theta = 0.5 b = [1 / k for k in range(1, p + 1)] sigma = make_spd_matrix(p) # generating data x = np.random.multivariate_normal(np.zeros(p), sigma, size=[n, ]) G = _g(np.dot(x, b)) prz = 1 / (1 + np.exp((-1) * (x[:, 0] * (-1) * b[4] + x[:, 1] * b[2] + np.random.standard_normal(size=[n, ])))) z = np.random.binomial(p=prz, n=1, size=[n, ]) u = np.random.standard_normal(size=[n, ]) pr = 1 / (1 + np.exp((-1) * (0.5 * z + x[:, 0] * (-0.5) + x[:, 1] * 0.25 - 0.5 * u + np.random.standard_normal(size=[n, ])))) d = np.random.binomial(p=pr, n=1, size=[n, ]) err = np.random.standard_normal(n) pry = 1 / (1 + np.exp((-1) * theta * d + G + 4 * u + err)) y = np.random.binomial(p=pry, n=1, size=[n, ]) return x, y, d, z
def init_norm_mix(self, nnorm=4): # init normal mixture (hardest) weights = np.random.uniform(1, 10, size=nnorm) weights /= sum(weights) if self.d > 10: scale = [2] * 10 + ([.1] * (self.d - 10)) else: scale = 2 meanm = [ np.random.normal(loc=0, scale=scale, size=self.d) for i in range(nnorm - 1) ] meanm.append(-np.sum(meanm, axis=0)) covm = [make_spd_matrix(self.d) for i in range(nnorm - 1)] comb = self.incomplete_comb(weights, meanm, covm) fac = .9 / max(np.linalg.eig(comb)[0]) meanm = np.multiply(meanm, np.sqrt(fac)) covm = np.multiply(covm, fac) comb = self.incomplete_comb(weights, meanm, covm) I = self.variance * np.eye(self.d) lastCov = (I - comb) / weights[-1] lastCov.shape = (1, ) + lastCov.shape covm = np.append(covm, lastCov, axis=0) self.weights = weights.flatten() self.means = meanm self.covs = covm
def main(): np.random.seed(12) data_dim = 8 n_data = 10 threshold_missing = 0.5 mu = np.random.randn(data_dim, 1) sigma = make_spd_matrix( n_dim=data_dim) # Generate a random positive semi-definite matrix # test if the matrix is positive definite # print(is_pos_def(sigma)) x_full = gauss.gauss_sample(mu, sigma, n_data) missing = np.random.rand(n_data, data_dim) < threshold_missing x_miss = np.copy(x_full) x_miss[missing] = np.nan x_imputed = gauss.gauss_impute(mu, sigma, x_miss) #Create a matrix from x_miss by replacing the NaNs with 0s to display the hinton_diagram xmiss0 = np.copy(x_miss) for g in np.argwhere(np.isnan(x_miss)): xmiss0[g[0], g[1]] = 0 plot_1 = plt.figure(1) pml.hinton_diagram(xmiss0, ax=plot_1.gca()) plot_1.suptitle('Observed') pml.savefig("gauss_impute_observed.pdf", dpi=300) plot_2 = plt.figure(2) pml.hinton_diagram(x_full, ax=plot_2.gca()) plot_2.suptitle('Hidden truth') pml.savefig("gauss_impute_truth.pdf", dpi=300) plot_3 = plt.figure(3) pml.hinton_diagram(x_imputed, ax=plot_3.gca()) plot_3.suptitle('Imputation with true params') pml.savefig("gauss_impute_pred.pdf", dpi=300) plt.show()
def Initialize_parameters(self, data_points): ''' Randomly initializes the parameters of each Gaussian distribution.\n It initializes n_cluster mean values of the same dimention as the datapoints. It initializes n_cluster covariance matrices. Each of these matrices is a diagonal square matrix of the dimention of the datapoints. It initializes n_cluster weights for each distribution. The weights sum to one. Parameters ---------- data_points : ndarray A 2D numpy array of all the datapoints and the dimention of the data. Returns ------- None. ''' from sklearn.datasets import make_spd_matrix self.means = np.random.permutation(data_points)[:self.n_clusters] self.covariances = np.zeros( (self.n_clusters, data_points.shape[-1], data_points.shape[-1])) for c in range(self.n_clusters): self.covariances[c] = make_spd_matrix(data_points.shape[-1]) self.weights = np.random.dirichlet(np.ones(self.n_clusters), size=1)[0] self.r = np.zeros((self.n_clusters, data_points.shape[0]))
def convex_quad_min(): """Returns objective, gradient, Hessian, solution for a convex QP. Returns ------- fobj : function Convex, quadratic objective function fgrad : function Gradient of the objective fhess : function Hessian of the objective (constant) sol : numpy.ndarray Global minimizer of the function """ # PRNG seed _seed = 7 # number of features/dimensionality, PRNG n_dim = 10 rng = np.random.default_rng(_seed) # make positive definite hessian by adding scaled identity matrix hess = make_spd_matrix(n_dim, random_state=_seed) hess += 1e-4 * np.eye(n_dim) # random linear terms drawn from [-5, 5] coef = rng.uniform(low=-5., high=5., size=n_dim) # objective function, gradient, and hessian fobj = lambda x: 0.5 * x @ hess @ x + coef @ x fgrad = lambda x: hess @ x + coef fhess = lambda x: hess # compute solution using scipy.linalg.solve sol = linalg.solve(hess, -coef, check_finite=False, assume_a="pos") # return fobj, fgrad, fhess, sol return fobj, fgrad, fhess, sol
def _initialize_core(self, mu=None, sigma=None, delta=None): """ Initialize a Core within the data space. Parameters ---------- mu : array-like, shape (n_features,), default=None Mean of the Core. sigma : array-like, shape (n_features, n_features), default=None Covariance of the Core. delta : array-like, shape (n_features,), default=None Weight of the Core. Returns ------- core : Core A Core within the data space given by `data`. """ if mu is not None and sigma is not None and delta is not None: return Core(mu=mu, sigma=sigma, delta=delta) elif self._data_range is not None: mu = self.random_state.rand(self.dim) * \ (self._data_range[1] - self._data_range[0]) + \ self._data_range[0] sigma = make_spd_matrix(self.dim) delta = np.ones((1)) / self.init_cores return Core(mu=mu, sigma=sigma, delta=delta) else: raise RuntimeError( "Data Range hasn't been set, likely because GMM hasn't been initialized yet" )
def toy_data(): name1 = "A" n1 = 10 x1 = np.arange(n1) y1 = np.random.random(n1) name2 = "B" n2 = 20 x2 = np.arange(n2) y2 = np.random.random(n2) name3 = "C" n3 = 30 x3 = np.arange(n3) y3 = np.random.random(n3) # Generate arbitrary covariance matrix, partition into parts full_cov = make_spd_matrix(n1 + n2 + n3, random_state=1234) cov1 = full_cov[:n1, :n1] cov2 = full_cov[n1:n1 + n2, n1:n1 + n2] cov3 = full_cov[n1 + n2:, n1 + n2:] data1 = GaussianData(name1, x1, y1, cov1) data2 = GaussianData(name2, x2, y2, cov2) data3 = GaussianData(name3, x3, y3, cov3) cross_cov = CrossCov({ (name1, name2): full_cov[:n1, n1:n1 + n2], (name1, name3): full_cov[:n1, n1 + n2:], (name2, name3): full_cov[n1:n1 + n2, n1 + n2:], }) return [data1, data2, data3], cross_cov
def run_test_conv(in_features, n_iterations=1000, size=1000, printing_step=10): coder = Convolution_Autoencoder(in_features**2) dataset = [FloatTensor(make_spd_matrix(in_features)).unsqueeze(0) for _ in range(size)] dataset = Variable(torch.cat(dataset, 0).reshape(size, 1, in_features, in_features)) test_size = 30 train_dataset = dataset[:-test_size] test_dataset = dataset[-test_size:] optimizer = optim.Adam(coder.parameters(), lr=0.1) loss_function = nn.MSELoss() for epoch in tqdm(range(1, n_iterations)): optimizer.zero_grad() outputs_train = coder(train_dataset) outputs_test = coder(test_dataset) loss_train = loss_function(outputs_train, train_dataset) loss_test = loss_function(outputs_test, test_dataset) loss_train.backward(retain_graph=True) if epoch % printing_step == 0: print("EPOCH: {0}, TRAIN LOSS: {1}, TEST LOSS".format(epoch, loss_train.data[0]), float(loss_test.data[0])) if epoch == 1000: optimizer.state_dict()['param_groups'][0]['lr'] == optimizer.state_dict()['param_groups'][0]['lr'] * 0.1 optimizer.step() return coder, dataset
def run_detetmenant(in_features, n_iterations=100, size=100): coder = DetNet(in_features) dataset = [FloatTensor(make_spd_matrix(in_features)).unsqueeze(0) for _ in range(size)] dataset = Variable(torch.cat(dataset, 0)) test_size = 30 train_dataset = dataset[:-test_size] test_dataset = dataset[-test_size:] optimizer = optim.Adam(coder.parameters(), lr=0.1) criterion = MSELoss() for epoch in tqdm(range(1, n_iterations)): optimizer.zero_grad() outputs_train = coder(train_dataset) outputs_test = coder(test_dataset) loss_train = criterion(outputs_train, train_dataset) loss_test = criterion(outputs_test, test_dataset) loss_train.backward(retain_graph=True) if epoch % 10 == 0: print("EPOCH: {0}, TRAIN LOSS: {1}, TEST LOSS".format(epoch, loss_train.data[0]), loss_test.data[0]) optimizer.step() return coder, dataset
def test_forest(self): """ Tests the forest construction by firstly ensuring the MSTs are identical when the correlation matrix only has unique edges, and secondly when the correlation matrix is degenerate """ p = 10 mean = np.zeros(p) M = make_spd_matrix(p) X = np.random.multivariate_normal(mean, M, 200) corr = np.corrcoef(X.T) nodes = list(np.arange(p)) mst = topcorr.mst(corr) forest = topcorr.mst_forest(corr) M_mst = nx.to_numpy_array(mst, nodes) M_forest = nx.to_numpy_array(forest, nodes) assert_array_almost_equal(M_mst, M_forest) example_mat = np.array([[0, 0.1, 0.3, 0.2, 0.1], [0.1, 0, 0.3, 0.4, 1.7], [0.3, 0.3, 0, 0.6, 0.5], [0.2, 0.4, 0.6, 0, 0.2], [0.1, 1.7, 0.5, 0.2, 0]]) example_corr = 1 - np.power(example_mat, 2) / 2 forest = topcorr.mst_forest(example_corr) mst = topcorr.mst(example_corr) forest_edges = len(forest.edges) mst_edges = len(mst.edges) assert (forest_edges > mst_edges) assert (nx.is_connected(forest))
def AlphaPrior(): mean_vec = alpha_mean_vec cov_mat = make_spd_matrix(n_dim=2) # the covariance matrix must be SPD Alpha = dict() Alpha['Value'] = rand.multivariate_normal(mean=mean_vec, cov=cov_mat) Alpha['Mean'] = mean_vec Alpha['Cov'] = cov_mat return Alpha
def make_covar_matrix(covariance_type, n_components, n_features, random_state=None): mincv = 0.1 prng = check_random_state(random_state) if covariance_type == 'spherical': return (mincv + mincv * prng.random_sample((n_components, )))**2 elif covariance_type == 'tied': return (make_spd_matrix(n_features) + mincv * np.eye(n_features)) elif covariance_type == 'diag': return (mincv + mincv * prng.random_sample( (n_components, n_features)))**2 elif covariance_type == 'full': return np.array([(make_spd_matrix(n_features, random_state=prng) + mincv * np.eye(n_features)) for x in range(n_components)])
def normal_data_with_cov(nrObservations, nrFeatures, seed): np.random.seed(seed) cov_mat = dts.make_spd_matrix(nrFeatures) mean_array = np.zeros(nrFeatures) data = np.random.multivariate_normal(mean = mean_array, cov = cov_mat, size=nrObservations) return data
def main(): N = 800 d = 2 np.random.seed(20) k = 3 w = np.random.rand(k) w = w / np.sum(w) print(w) Z = generateZ(d, k) index = [i for i in range(k)] #print(index) print(Z) X = np.zeros(N) Y = np.zeros(N) #mu = [] Z_idxs = [] Xn = [] for j in range(N): z_idx = np.random.choice(index, p=w) #print(z_idx) mu_idx, sigma_idx = Z[z_idx] #mu.append(np.random.rand()) #cov.append(sigma_idx) Z_idxs.append(z_idx) X[j], Y[j] = np.random.multivariate_normal(mu_idx, sigma_idx, check_valid='warn') #print("x = ",X[j],", y = ",Y[j]) x_j = np.array([X[j], Y[j]]) Xn.append(x_j) plt.scatter(X, Y) Mu = np.random.random((k, d)) print(Mu) Cov = [] for i in range(k): seed = i + 10 z = make_spd_matrix(d, random_state=seed) #(z_sym + z_sym.T)/2 #mu = np.random.rand(d) Cov.append(z) # print(Cov) Cov = np.array(Cov) print(Cov) w = np.random.random(k) # print(w) w = w / np.sum(w) print(w) Xn = np.array(Xn) # print(Xn) start_time = time.clock() # get_E_step(Xn, Mu, Cov, w, d, k, N) EM_algo(Xn, k, Mu, Cov, w, d, N) end_time = time.clock() # print(log_like) print(end_time - start_time, "s")
def gen_data(dim, N_size): from random import choice from sklearn.datasets import make_spd_matrix # Define the number of samples num_samples = N_size C = make_spd_matrix(dim, random_state=123) + 0.000001 * np.identity(dim) mean = [0 for i in xrange(dim)] X = np.random.multivariate_normal(mean, C, num_samples) return X, C
def make_spd_matrices_4d(num_samples, num_data_points, num_dimensions, random_state): matrices = np.zeros( (num_samples, num_data_points, num_dimensions, num_dimensions)) for i in range(num_samples): for j in range(num_data_points): matrices[i, j, :, :] = make_spd_matrix(num_dimensions, random_state=random_state) return matrices
def make_lqr(state_size, action_size): n_dim = state_size + action_size F = np.random.normal(size=(state_size, n_dim)) f = np.random.normal(size=(state_size, 1)) C = make_spd_matrix(n_dim) c = np.random.normal(size=(n_dim, 1)) return LQR(F, f, C, c)
def test_make_spd_matrix(): X = make_spd_matrix(n_dim=5, random_state=0) assert_equal(X.shape, (5, 5), "X shape mismatch") assert_array_almost_equal(X, X.T) from numpy.linalg import eig eigenvalues, _ = eig(X) assert_array_equal(eigenvalues > 0, np.array([True] * 5), "X is not positive-definite")
def test_one_class(self): # if there is only one class the gradient is null, so the final matrix # must stay like the initialization X = self.iris_points[self.iris_labels == 0] y = self.iris_labels[self.iris_labels == 0] A = make_spd_matrix(X.shape[1], X.shape[1]) nca = NCA(init=A, max_iter=30, n_components=X.shape[1]) nca.fit(X, y) assert_array_equal(nca.components_, A)
def make_covar_matrix(covariance_type, n_states, n_features, random_state=None): mincv = 0.1 prng = check_random_state(random_state) if covariance_type == "spherical": return (mincv + mincv * prng.random_sample((n_states,))) ** 2 elif covariance_type == "tied": return make_spd_matrix(n_features) + mincv * np.eye(n_features) elif covariance_type == "diagonal": return (mincv + mincv * prng.random_sample((n_states, n_features))) ** 2 elif covariance_type == "full": return np.array( [ ( make_spd_matrix(n_features, random_state=prng) + mincv * np.eye(n_features) ) for _ in range(n_states) ] )
def __init__(self, data, clusters, color="test", max_itr=400, eps=1e-8): self.train_data = data self.clusters = clusters self.color = self.color2pixel(color) self.max_itr = max_itr self.eps = eps self.weights = np.ones(self.clusters) / self.clusters self.means = np.random.choice(data.flatten(), (self.clusters, data.shape[-1])) self.cov = np.array( [make_spd_matrix(data.shape[-1]) for i in range(self.clusters)])
def generateZ(d, k): Z = [] mu = np.array([[0.3, 4], [5.5, 0.25], [7, 7]]) for i in range(k): #z_sym = np.random.rand(d,d) #z = (np.tril(z_sym) + np.tril(z_sym).T)/2 seed = i + 10 z = make_spd_matrix(d, random_state=seed) #(z_sym + z_sym.T)/2 #mu = np.random.rand(d) Z.append((mu[i], z)) return Z
def sim_Unobs_Data(xDim, nSim, p_AgivenZ, p_AgivenNotZ): #** Follow the graph to generate Y # Y = X * beta + U * gamma + Z * CATE #** Add a U to X, A and Y # confounders x_Sigma = make_spd_matrix(xDim) X = np.random.multivariate_normal(np.zeros(xDim), x_Sigma, size=nSim) beta = np.random.choice(5, xDim, replace=True, p=[.3, .25, .2, .15, .1]) U = np.random.normal(0.5 * np.ones(nSim), 1) gamma = 2 # ground truth CATE CATE = np.array([1, 2, 3, 4]) p_G = np.exp(X[:, 1]) / (1 + np.exp(X[:, 1])) Group = np.zeros(nSim) Group[p_G < 0.75] = 1 Group[p_G < 0.5] = 2 Group[p_G < 0.25] = 3 Group = Group.astype(int) # Z encourage A, and split-treatment criterion p_A_given_Z_X > p_A_given_notZ_X p_A_given_Z = [p_AgivenZ for x in X] #if x[0] > 0 else p_AgivenZ-0.1 p_A_given_notZ = [p_AgivenNotZ for x in X] Compliance = np.array(p_A_given_Z) - np.array(p_A_given_notZ) print('Avg compliance:', np.mean(Compliance)) # randomized treatment Z = np.random.choice(2, nSim) A = [ np.random.choice(2, 1, p=[1 - p_A_given_Z[i], p_A_given_Z[i]]) if Z[i] == 1 else np.random.choice( 2, 1, p=[1 - p_A_given_notZ[i], p_A_given_notZ[i]]) for i in range(nSim) ] # ground truth two-arm potential outcomes Y_0 = np.random.normal(np.sum(X * beta, 1) + U * gamma, 1) #Y_1 = Y_0 + np.random.normal(CATE[Group],1) Y_1 = Y_0 + CATE[Group] / (Compliance * 0.5) Y = [Y_0[i] if A[i] == 0 else Y_1[i] for i in range(nSim)] Z = np.array(Z).ravel() A = np.array(A).ravel() Y = np.array(Y).ravel() # print('Z==1:',sum(Z), 'A==Z:',sum(A*Z)) # return full observed data return X, Y, A, nSim, Group, Y_0, Y_1, Z, A
def generate_tree(self,p, dim = 50): """Generate a tree where each tree is a leaf node with probability p""" cov_mat = make_spd_matrix(dim) root = Node(cov_mat = cov_mat, index = 0) #make the scale of inverse chi squared half the size of the lowest #eigenvalue eVals = np.linalg.eigvals(cov_mat) self.scale = eVals[-1] / 100 self.df = 4 self.root = root self.n_nodes = 1 self.generate_from_node(root,p)
def laplacian_test(): from sklearn.datasets import make_sparse_spd_matrix, make_spd_matrix from sklearn.preprocessing import MinMaxScaler from scipy.sparse import csr_matrix, linalg import time as time import matplotlib.pyplot as plt plt.style.use('ggplot') A = csr_matrix(make_spd_matrix(100)) L, D, t = [], [], [] for method in ['personal', 'sklearn']: t0 = time.time() temp_L, temp_D = create_laplacian(A) t1 = time.time() L.append(temp_L); D.append(temp_D) t.append(t1-t0) fig, ax = plt.subplots(nrows=1, ncols=2) ax[0].spy(L[0], precision=1E-10, markersize=.2) ax[0].set_title('My Method; {t:.2e} secs'.format(t=t[0])) ax[1].spy(L[1], precision=1E-10, markersize=.2) ax[1].set_title('Sklearn; {t:.2e} secs'.format(t=t[1])) plt.show() print(np.shape(L[0]), np.shape(L[1])) tol = 1E-1 print('Different between the Laplacian Matrix' \ 'values close with tol: {tol}?'.format(tol=tol)) assert (np.allclose(L[0].data, L[1].data, rtol=tol)), "False Laplacians not" \ " the same." print('Test passed.')
iris_df.fillna(iris_df.max())['sepal length (cm)'].head(5) #0 5.1 #1 4.9 #2 4.7 #3 7.9 #4 5.0 #Name: sepal length (cm), dtype: float64 #Using Pipelines管道 for multiple preprocessing steps from sklearn import datasets import numpy as np mat = datasets.make_spd_matrix(10) masking_array = np.random.binomial(1, .1, mat.shape).astype(bool) #数据丢失 mat[masking_array] = np.nan mat[:4, :4] #array([[ 0.56716186, -0.20344151, nan, -0.22579163], #[ nan, 1.98881836, -2.25445983, 1.27024191], #[ 0.29327486, -2.25445983, 3.15525425, -1.64685403], #[-0.22579163, 1.27024191, -1.64685403, 1.32240835]]) # 没使用管道 from sklearn import preprocessing impute = preprocessing.Imputer() scaler = preprocessing.StandardScaler() mat_imputed = impute.fit_transform(mat) mat_imputed[:4, :4]