def test_Piecewise_expectations_with_evidence(self): adata = np.zeros((20000, 2)) adata[:, 1] = 0 adata[:, 0] = np.random.normal(loc=100.0, scale=5.00, size=adata.shape[0]) bdata = np.zeros_like(adata) bdata[:, 1] = 1 bdata[:, 0] = np.random.normal(loc=50.0, scale=5.00, size=bdata.shape[0]) data = np.vstack((adata, bdata)) ds_context = Context(meta_types=[MetaType.REAL, MetaType.DISCRETE]) ds_context.parametric_types = [None, Categorical] ds_context.add_domains(data) L = create_piecewise_leaf( adata[:, 0].reshape(-1, 1), ds_context, scope=[0], prior_weight=None, hist_source="numpy") * create_parametric_leaf( adata[:, 1].reshape(-1, 1), ds_context, scope=[1]) R = create_piecewise_leaf( bdata[:, 0].reshape(-1, 1), ds_context, scope=[0], prior_weight=None, hist_source="numpy") * create_parametric_leaf( bdata[:, 1].reshape(-1, 1), ds_context, scope=[1]) spn = 0.5 * L + 0.5 * R evidence = np.zeros((2, 2)) evidence[1, 1] = 1 evidence[:, 0] = np.nan expectation = Expectation(spn, set([0]), evidence) self.assertAlmostEqual(np.mean(adata[:, 0]), expectation[0, 0], 2) self.assertAlmostEqual(np.mean(bdata[:, 0]), expectation[1, 0], 2)
) # # spn # ds_context = Context(meta_types=[MetaType.REAL] * blocked_images[0].shape[1]) # ds_context.add_domains(blocked_images[0]) # ds_context.parametric_type = [Poisson] * blocked_images[0].shape[1] # # print("data ready", data.shape) # #the following two options should be working now. # # spn = learn_structure(upperimage, ds_context, get_split_rows_random_partition(np.random.RandomState(17)), get_split_cols_random_partition(np.random.RandomState(17)), create_parametric_leaf) # spn = learn_parametric(blocked_images[0], ds_context, min_instances_slice=0.1*len(data), ohe=False) # spn ds_context = Context(meta_types=[MetaType.DISCRETE] * 10) ds_context.add_domains(data_labels) ds_context.parametric_types = [Bernoulli] * blocked_images[0].shape[1] spn = learn_parametric(data_labels, ds_context, min_instances_slice=0.3 * len(data_labels)) # first cspn dataIn = data_labels dataOut = blocked_images[0] ds_context = Context(meta_types=[MetaType.DISCRETE] * dataOut.shape[1]) ds_context.add_domains(dataOut) ds_context.parametric_types = [Conditional_Poisson] * dataOut.shape[1] scope = list(range(dataOut.shape[1])) print(np.shape(dataIn), np.shape(dataOut)) print(dataIn[0], dataOut[0]) cspn_1st = learn_conditional(np.concatenate((dataOut, dataIn), axis=1),
add_histogram_inference_support() add_parametric_inference_support() memory = Memory(cachedir="cache", verbose=0, compress=9) data = [] for x in range(10): for y in range(10): for z in range(10): data.append([x, y, z, int(((x + y + z) / 5))]) data = np.array(data).astype(np.float) types = [ MetaType.DISCRETE, MetaType.DISCRETE, MetaType.DISCRETE, MetaType.DISCRETE ] ds_context = Context(meta_types=types) ds_context.parametric_types = [Gaussian, Gaussian, Gaussian, Categorical] ds_context.add_domains(data) num_classes = len(np.unique(data[:, 3])) #spn = learn_mspn(data, ds_context, min_instances_slice=10, leaves=create_leaf, threshold=0.3) spn = Sum() for label, count in zip(*np.unique(data[:, 3], return_counts=True)): branch = learn_mspn(data[data[:, 3] == label, :], ds_context, min_instances_slice=10, leaves=create_leaf, threshold=0.1) spn.children.append(branch) spn.weights.append(count / data.shape[0])
datasets.append((get_blocks(images, num_blocks=num_blocks, blocks=block_ids.tolist()), 1)) num_mpes = 1 num_samples = 10 cspns = [] mpe_query_blocks = None sample_query_blocks = None for i, ((tr_block, block_idx), conditional_blocks) in enumerate(datasets): print("learning", i) conditional_features_count = (tr_block.shape[1] // len(block_idx)) * conditional_blocks if i == 0: # spn ds_context = Context(meta_types=[MetaType.REAL] * tr_block.shape[1]) ds_context.add_domains(tr_block) ds_context.parametric_types = [Gaussian] * tr_block.shape[1] cspn = learn_parametric(tr_block, ds_context, min_instances_slice=20, ohe=False, memory=memory) else: cspn = learn_conditional( tr_block, Context( meta_types=[MetaType.REAL] * tr_block.shape[1], parametric_types=[Conditional_Gaussian] * tr_block.shape[1], ).add_domains(tr_block), scope=list(range(conditional_features_count)), min_instances_slice=30, memory=memory, ) cspns.append(cspn) print("done")
def train_spn(window_size=3, min_instances_slice=10000, features=None, number_of_classes=3): if features is None: features = [20, 120] add_parametric_inference_support() add_parametric_text_support() data = get_data_in_window(window_size=window_size, features=features, three_classes=number_of_classes == 3) sss = sk.model_selection.StratifiedShuffleSplit(test_size=0.2, train_size=0.8, random_state=42) for train_index, test_index in sss.split( data[:, 0:window_size * window_size * len(features)], data[:, (window_size * window_size * len(features)) + (int(window_size * window_size / 2))]): X_train, X_test = data[train_index], data[test_index] context_list = list() parametric_list = list() number_of_features = len(features) for _ in range(number_of_features * window_size * window_size): context_list.append(MetaType.REAL) parametric_list.append(Gaussian) for _ in range(window_size * window_size): context_list.append(MetaType.DISCRETE) parametric_list.append(Categorical) ds_context = Context(meta_types=context_list) ds_context.add_domains(data) ds_context.parametric_types = parametric_list spn = load_spn(window_size, features, min_instances_slice, number_of_classes) if spn is None: spn = Sum() for class_pixel in tqdm(range(-window_size * window_size, 0)): for label, count in zip( *np.unique(data[:, class_pixel], return_counts=True)): train_data = X_train[X_train[:, class_pixel] == label, :] branch = learn_parametric( train_data, ds_context, min_instances_slice=min_instances_slice) spn.children.append(branch) spn.weights.append(train_data.shape[0]) spn.scope.extend(branch.scope) spn.weights = (np.array(spn.weights) / sum(spn.weights)).tolist() assign_ids(spn) save_spn(spn, window_size, features, min_instances_slice, number_of_classes) res = np.ndarray((X_test.shape[0], number_of_classes)) for i in tqdm(range(number_of_classes)): tmp = X_test.copy() tmp[:, -int((window_size**2) / 2)] = i res[:, i] = log_likelihood(spn, tmp)[:, 0] predicted_classes = np.argmax(res, axis=1).reshape((X_test.shape[0], 1)) correct_predicted = 0 for x, y in zip(X_test[:, -5], predicted_classes): if x == y[0]: correct_predicted += 1 accuracy = correct_predicted / X_test.shape[0] return spn, accuracy
np.random.seed(42) dataIn = np.random.randint(low=0, high=3, size=600).reshape(-1, 2) dataOut = np.random.randint(low=0, high=3, size=1200).reshape(-1, 4) data = np.concatenate((dataOut, dataIn), axis=1) assert data.shape[ 1] == dataIn.shape[1] + dataOut.shape[1], 'invalid column size' assert data.shape[0] == dataIn.shape[0] == dataOut.shape[ 0], 'invalid row size' ds_context = Context(meta_types=[ MetaType.DISCRETE, MetaType.DISCRETE, MetaType.DISCRETE, MetaType.DISCRETE ]) ds_context.add_domains(dataOut) ds_context.parametric_types = [ Conditional_Poisson, Conditional_Poisson, Conditional_Poisson, Conditional_Poisson ] scope = list(range(dataOut.shape[1])) spn = Sum() for label, count in zip(*np.unique(data[:, 2], return_counts=True)): branch = learn_conditional(data, ds_context, scope, min_instances_slice=10000) spn.children.append(branch) spn.weights.append(count / data.shape[0]) spn.scope.extend(branch.scope)
# # print(sample_instances(spn, np.array([[np.nan] * 50] * 3).reshape(-1, 50), RandomState(123))) # # 0/0 # cspn dataIn = upperimage dataOut = bottomimage np.random.seed(42) # assert data.shape[1] == dataIn.shape[1] + dataOut.shape[1], 'invalid column size' # assert data.shape[0] == dataIn.shape[0] == dataOut.shape[0], 'invalid row size' ds_context = Context(meta_types=[MetaType.DISCRETE] * dataOut.shape[1]) ds_context.add_domains(dataOut) ds_context.parametric_types = [Conditional_Poisson] * dataOut.shape[1] scope = list(range(dataOut.shape[1])) cspn = learn_conditional(np.concatenate((dataOut, dataIn), axis=1), ds_context, scope, min_instances_slice=60000000) # spn.scope.extend(branch.scope) print(cspn) plot_spn(cspn, "basicspn.png") fileObject = open(cspn_file, "wb") pickle.dump(cspn, fileObject)
from spn.structure.leaves.parametric.Parametric import Poisson, Categorical from spn.structure.leaves.parametric.Text import add_parametric_text_support if __name__ == '__main__': add_parametric_inference_support() add_parametric_text_support() np.random.seed(42) data = np.random.randint(low=0, high=3, size=600).reshape(-1, 3) #print(data) ds_context = Context( meta_types=[MetaType.DISCRETE, MetaType.DISCRETE, MetaType.DISCRETE]) ds_context.add_domains(data) ds_context.parametric_types = [Poisson, Poisson, Categorical] spn = Sum() for label, count in zip(*np.unique(data[:, 2], return_counts=True)): branch = learn_parametric(data[data[:, 2] == label, :], ds_context, min_instances_slice=10000) spn.children.append(branch) spn.weights.append(count / data.shape[0]) spn.scope.extend(branch.scope) print(spn) print(spn_to_str_equation(spn))