def sample(): spn = create_SPN() import numpy as np from numpy.random.mtrand import RandomState from spn.algorithms.Sampling import sample_instances print(sample_instances(spn, np.array([np.nan, 0, 0] * 5).reshape(-1, 3), RandomState(123))) print(sample_instances(spn, np.array([np.nan, np.nan, np.nan] * 5).reshape(-1, 3), RandomState(123)))
def test_leaf_sampling_multilabel(self): np.random.seed(17) x = np.concatenate( ( np.random.multivariate_normal([10, 10], np.eye(2), 5000), np.random.multivariate_normal([1, 1], np.eye(2), 5000), ), axis=0, ) y = np.concatenate( ( np.array([0] * 5000 + [1] * 5000).reshape(-1, 1), np.array([1] * 5000 + [0] * 5000).reshape(-1, 1), ), axis=1, ) # associates y0=0 with X=[10,10] # associates y0=1 with X=[1,1] # associates y1=1 with X=[10,10] # associates y1=0 with X=[1,1] data = concatenate_yx(y, x) cspn = CSPNClassifier([Bernoulli] * y.shape[1], min_instances_slice=4990, cluster_univariate=True) cspn.fit(x, y) res = sample_instances( cspn.cspn, np.array([np.nan, np.nan, 10, 10] * 1000).reshape(-1, 4), 17) self.assertAlmostEqual(np.unique(res[:, 0]), 0) self.assertAlmostEqual(np.unique(res[:, 1]), 1) res = sample_instances( cspn.cspn, np.array([np.nan, np.nan, 1, 1] * 1000).reshape(-1, 4), 17) self.assertAlmostEqual(np.unique(res[:, 0]), 1) self.assertAlmostEqual(np.unique(res[:, 1]), 0) res = sample_instances( cspn.cspn, np.array([np.nan, 0, 1, 1, np.nan, 1, 10, 10] * 1000).reshape( -1, 4), 17) self.assertAlmostEqual(np.unique(res[::2, 0]), 1) self.assertAlmostEqual(np.unique(res[1::2, 0]), 0) self.assertAlmostEqual(np.unique(res[::2, 1]), 0) self.assertAlmostEqual(np.unique(res[1::2, 1]), 1) with self.assertRaises(AssertionError): sample_instances( cspn.cspn, np.array([np.nan, 1, 1, 1, np.nan, 0, 10, 10, 1, 1, 10, 10]).reshape(-1, 4), 17)
def test_histogram_samples(self): import numpy as np from numpy.random.mtrand import RandomState from spn.algorithms.Sampling import sample_instances from spn.structure.Base import Context from spn.structure.StatisticalTypes import MetaType from spn.algorithms.LearningWrappers import learn_mspn np.random.seed(123) a = np.random.randint(2, size=10000).reshape(-1, 1) b = np.random.randint(3, size=10000).reshape(-1, 1) c = np.r_[np.random.normal(10, 5, (3000, 1)), np.random.normal(20, 10, (7000, 1))] d = 5 * a + 3 * b + c train_data = np.c_[a, b, c, d] ds_context = Context(meta_types=[ MetaType.DISCRETE, MetaType.DISCRETE, MetaType.REAL, MetaType.REAL ]).add_domains(train_data) mspn = learn_mspn(train_data, ds_context, min_instances_slice=200) samples = sample_instances( mspn, np.array([np.nan, np.nan, np.nan, np.nan] * 100).reshape(-1, 4), RandomState(123)) print(np.max(samples, axis=0), np.min(samples, axis=0)) print(ds_context.domains)
def _sample(self, n=1, random_state=RandomState(123)): placeholder = np.repeat(np.array(self._condition), n, axis=0) s = sample_instances(self._spn, placeholder, random_state) indices = [self._initial_names_to_index[name] for name in self.names] result = s[:, indices] result = [self._numeric_to_names(l) for l in result.tolist()] return result
def test_correct_parameters(self): node_1_2_2 = Leaf(0) node_1_2_1 = Leaf(1) node_1_1 = Leaf([0, 1]) node_1_2 = node_1_2_1 * node_1_2_2 spn = 0.1 * node_1_1 + 0.9 * node_1_2 node_1_2.id = 0 rand_gen = RandomState(1234) with self.assertRaises(AssertionError): sample_instances(spn, rand_gen.rand(10, 3), rand_gen) assign_ids(spn) node_1_2_2.id += 1 with self.assertRaises(AssertionError): sample_instances(spn, rand_gen.rand(10, 3), rand_gen)
def test_leaf_sampling(self): np.random.seed(17) x = np.concatenate( ( np.random.multivariate_normal([10, 10], np.eye(2), 5000), np.random.multivariate_normal([1, 1], np.eye(2), 5000), ), axis=0, ) y = np.array( np.random.normal(20, 2, 5000).tolist() + np.random.normal(60, 2, 5000).tolist()).reshape(-1, 1) # associates y=20 with X=[10,10] # associates y=60 with X=[1,1] data = concatenate_yx(y, x) ds_context = Context(parametric_types=[Gaussian]) ds_context.feature_size = 2 leaf = create_conditional_leaf(data, ds_context, [0]) res = sample_instances( leaf, np.array([np.nan, 10, 10] * 1000).reshape(-1, 3), 17) self.assertAlmostEqual(np.mean(res[:, 0]), 20.456669723751173) res = sample_instances(leaf, np.array([np.nan, 1, 1] * 1000).reshape(-1, 3), 17) self.assertAlmostEqual(np.mean(res[:, 0]), 59.496663076099196) res = sample_instances( leaf, np.array([np.nan, 1, 1, np.nan, 10, 10] * 1000).reshape(-1, 3), 17) self.assertAlmostEqual(np.mean(res[::2, 0]), 59.546359637084564) self.assertAlmostEqual(np.mean(res[1::2, 0]), 20.452118792501008) with self.assertRaises(AssertionError): sample_instances( leaf, np.array([np.nan, 1, 1, np.nan, 10, 10, 5, 10, 10]).reshape(-1, 3), 17)
def test_induced_trees(self): add_parametric_inference_support() add_parametric_sampling_support() spn = 0.5 * (Gaussian(mean=10, stdev=0.000000001, scope=0) * Categorical(p=[1.0, 0], scope=1)) + \ 0.5 * (Gaussian(mean=50, stdev=0.000000001, scope=0) * Categorical(p=[0, 1.0], scope=1)) rand_gen = np.random.RandomState(17) data = np.zeros((2, 2)) data[1, 1] = 1 data[:, 0] = np.nan sample_instances(spn, data, rand_gen) self.assertAlmostEqual(data[0, 0], 10) self.assertAlmostEqual(data[1, 0], 50)
def test_leaf_sampling_categorical(self): np.random.seed(17) x = np.concatenate( ( np.random.multivariate_normal([20, 20], np.eye(2), 500), np.random.multivariate_normal([10, 10], np.eye(2), 500), np.random.multivariate_normal([1, 1], np.eye(2), 500), ), axis=0, ) y = np.array([2] * 500 + [1] * 500 + [0] * 500).reshape(-1, 1) data = concatenate_yx(y, x) ds_context = Context(parametric_types=[Categorical]) ds_context.feature_size = 2 leaf = create_conditional_leaf(data, ds_context, [0]) res = sample_instances( leaf, np.array([np.nan, 10, 10] * 1000).reshape(-1, 3), RandomState(17)) self.assertAlmostEqual(np.mean(res[:, 0]), 1, 1) res = sample_instances(leaf, np.array([np.nan, 1, 1] * 1000).reshape(-1, 3), RandomState(17)) self.assertAlmostEqual(np.mean(res[:, 0]), 0, 1) res = sample_instances( leaf, np.array([np.nan, 1, 1, np.nan, 10, 10] * 1000).reshape(-1, 3), RandomState(17)) self.assertAlmostEqual(np.mean(res[::2, 0]), 0, 1) self.assertAlmostEqual(np.mean(res[1::2, 0]), 1, 1) with self.assertRaises(AssertionError): sample_instances( leaf, np.array([np.nan, 1, 1, np.nan, 10, 10, 5, 10, 10]).reshape(-1, 3), RandomState(17))
def test_induced_trees(self): spn = 0.5 * (Gaussian(mean=10, stdev=0.000000001, scope=0) * Categorical(p=[1.0, 0], scope=1)) + 0.5 * ( Gaussian(mean=50, stdev=0.000000001, scope=0) * Categorical(p=[0, 1.0], scope=1) ) rand_gen = np.random.RandomState(17) data = np.zeros((2, 2)) data[1, 1] = 1 data[:, 0] = np.nan samples = sample_instances(spn, data, rand_gen) self.assertAlmostEqual(samples[0, 0], 10) self.assertAlmostEqual(samples[1, 0], 50)
def _sample(self, n=1, random_state=RandomState(123)): placeholder = np.repeat(np.array(self._condition), n, axis=0) s = sample_instances(self._spn, placeholder, random_state) indices = [self._initial_names_to_index[name] for name in self.names] result = s[:, indices] result = result.tolist() # performance shortcuts names = self.names cat_vars = self._categorical_variables # convert integers back to categorical names # TODO: double for loop ... :-( for r in result: for i in range(len(r)): if names[i] in cat_vars: r[i] = cat_vars[names[i]]['int_to_name'][round(r[i])] return result
pickle.dump(cspn, fileObject) fileObject.close() from spn.structure.leaves.conditional.Sampling import add_conditional_sampling_support add_conditional_inference_support() add_conditional_sampling_support() from numpy.random.mtrand import RandomState from spn.algorithms.Sampling import sample_instances num_samples = 30 num_half_image_pixels = downscaleto * downscaleto // 4 # block_samples_spn = sample_instances(spn, np.array([[np.nan] * num_half_image_pixels] * num_samples).reshape(-1, num_half_image_pixels), RandomState(123)) annotation_spn = sample_instances( spn, np.array([[np.nan] * 10] * num_samples).reshape(-1, 10), RandomState(123)) # sample 1st block samples_placholder = np.concatenate( (np.array([[np.nan] * num_half_image_pixels] * num_samples).reshape( -1, num_half_image_pixels), annotation_spn), axis=1, ) block_samples_spn = sample_instances(cspn_1st, samples_placholder, RandomState(123)) final_samples = np.zeros((num_samples, downscaleto, downscaleto)) final_samples_block = [ final_samples[:, :horizontal_middle, :vertical_middle], final_samples[:, :horizontal_middle, vertical_middle:],
sample_query_blocks = np.zeros_like(tr_block[0:num_samples, :].reshape(num_samples, -1)) else: # i+1 time: we set the previous mpe values as evidence mpe_query_blocks = np.zeros_like(np.array(tr_block[0:num_mpes, :].reshape(num_mpes, -1))) mpe_query_blocks[:, -(mpe_result.shape[1]) :] = mpe_result sample_query_blocks = np.zeros_like(np.array(tr_block[0:num_samples, :].reshape(num_samples, -1))) sample_query_blocks[:, -(sample_result.shape[1]) :] = sample_result cspn_mpe_query = set_sub_block_nans(mpe_query_blocks, inp=block_idx, nans=block_idx[0:conditional_blocks]) mpe_result = mpe(cspn, cspn_mpe_query) mpe_img_blocks = stitch_imgs( mpe_result.shape[0], img_size=img_size, num_blocks=num_blocks, blocks={tuple(block_idx): mpe_result} ) cspn_sample_query = set_sub_block_nans(sample_query_blocks, inp=block_idx, nans=block_idx[0:conditional_blocks]) sample_result = sample_instances(cspn, cspn_sample_query, RandomState(123)) sample_img_blocks = stitch_imgs( sample_result.shape[0], img_size=img_size, num_blocks=num_blocks, blocks={tuple(block_idx): sample_result} ) for j in range(num_mpes): mpe_fname = output_path + "mpe_%s_%s.png" % ("-".join(map(str, block_idx)), j) save_img(mpe_img_blocks[j], mpe_fname) for j in range(num_samples): sample_fname = output_path + "sample_%s_%s.png" % ("-".join(map(str, block_idx)), j) save_img(sample_img_blocks[j], sample_fname)
# for t, c in get_structure_stats_dict(cspn)['count_per_type'].items(): # print(t, c) # 0/0 num_images = 40 sample_images = [] rng = RandomState(17) for i, (tr_block, _) in enumerate(datasets): spn = cspns[i] if i == 0: y = np.zeros((num_images, 1)) y[:] = np.nan sample_instances(spn, y, rng, in_place=True) y[:] = 0 data = np.zeros_like(to_ohe(y[:, 0].astype(int), n_people)) data = np.eye(n_people) # data[:, 9] = 1 # data[:, 11] = 1 # data[:] = 1 sample_images.insert(0, data) else: y = np.zeros((num_images, block_size)) y[:] = np.nan X = np.concatenate(sample_images, axis=1)
# start sampling from spn.structure.leaves.conditional.Sampling import add_conditional_sampling_support add_conditional_inference_support() add_conditional_sampling_support() from numpy.random.mtrand import RandomState from spn.algorithms.Sampling import sample_instances num_samples = 30 num_half_image_pixels = downscaleto * downscaleto // 4 samples_placeholder = np.array([[np.nan] * num_half_image_pixels] * num_samples).reshape( -1, num_half_image_pixels) top_left_samples = sample_instances(spn, samples_placeholder, RandomState(123)) samples_placeholder = np.concatenate( (np.array([[np.nan] * num_half_image_pixels] * top_left_samples.shape[0]), top_left_samples), axis=1) sample_images = sample_instances(cspn, samples_placeholder, RandomState(123)) top_right_samples = sample_images[:, :num_half_image_pixels] # tmp = np.zeros((num_samples, 8, 8)) # tmp[:, :4, :4] = top_left_samples.reshape(num_samples, 4, 4) # tmp[:, :4, 4:] = top_right_samples.reshape(num_samples, 4, 4) final_samples = np.zeros((num_samples, downscaleto, downscaleto)) final_samples[:, :horizontal_middle, :
cspn, spn = pickle.load(f) def conditional_input_to_LR(input_images_in_rl): # format L|R images_to_lr = np.concatenate( (input_images_in_rl[:, input_images_in_rl.shape[1] // 2:].reshape(input_images_in_rl.shape[0], px, -1), input_images_in_rl[:, :input_images_in_rl.shape[1] // 2].reshape(input_images_in_rl.shape[0], px, -1)), axis=2).reshape( input_images_in_rl.shape[0], -1) return images_to_lr spn_input = np.zeros_like(right).reshape(px, -1) / 0 sample_left = sample_instances(spn, spn_input, RandomState(123)) sample_input = np.concatenate((np.zeros_like(right).reshape(px, -1) / 0, sample_left), axis=1) sample_plot = conditional_input_to_LR(sample_input) for r in range(sample_plot.shape[0]): plot_img(sample_plot[r], px, py) sample_images = sample_instances(cspn, sample_input, RandomState(123)) sample_plot = conditional_input_to_LR(sample_images) for r in range(sample_plot.shape[0]): plot_img(sample_plot[r], px, py)
ds_context.add_domains(dataOut) ds_context.parametric_types = [Conditional_Poisson] * dataOut.shape[1] scope = list(range(dataOut.shape[1])) cspn = learn_conditional(np.concatenate((dataOut, dataIn), axis=1), ds_context, scope, min_instances_slice=60000000) # spn.scope.extend(branch.scope) print(cspn) plot_spn(cspn, "basicspn.png") fileObject = open(cspn_file, "wb") pickle.dump(cspn, fileObject) fileObject.close() from numpy.random.mtrand import RandomState from spn.algorithms.Sampling import sample_instances from spn.structure.leaves.conditional.Sampling import add_conditional_sampling_support add_conditional_inference_support() add_conditional_sampling_support() sample_data = np.concatenate( (np.array([[np.nan] * 50] * dataOut.shape[0]).reshape(-1, 50), dataIn), axis=1) print(sample_instances(cspn, sample_data, RandomState(123)))
spn = learn_rand_spn(data, ds_context, min_instances_slice=500, row_a=2, row_b=5, col_a=2, col_b=5, col_threshold=0.3, memory=None, rand_gen=rand_gen) add_parametric_text_support() print(spn_to_str_equation(spn)) print(spn.scope) # # sampling again X, _Z, P = sample_instances(spn, D, N, rand_gen, return_Zs=True, return_partition=True, dtype=np.float64) # # visualizing stats = get_structure_stats_dict(spn) inv_leaf_map = {l.id: spn_to_str_equation(l) # l.__class__.__name__ for l in get_nodes_by_type(spn, Leaf)} title_str = "{} samples from spn with {} sums {} prods {} leaves".format(N, stats['sum'], stats['prod'], stats['leaf']) visualize_data_partition(P, color_map_ids=inv_leaf_map, title=title_str) # # ordering partitions reord_ids = reorder_data_partitions(P) title_str = "ordered {} samples from spn with {} sums {} prods {} leaves".format(N,