spn = learn_parametric(data_labels, ds_context, min_instances_slice=0.3 * len(data_labels)) # first cspn dataIn = data_labels dataOut = blocked_images[0] ds_context = Context(meta_types=[MetaType.DISCRETE] * dataOut.shape[1]) ds_context.add_domains(dataOut) ds_context.parametric_types = [Conditional_Poisson] * dataOut.shape[1] scope = list(range(dataOut.shape[1])) print(np.shape(dataIn), np.shape(dataOut)) print(dataIn[0], dataOut[0]) cspn_1st = learn_conditional(np.concatenate((dataOut, dataIn), axis=1), ds_context, scope, min_instances_slice=0.3 * len(data)) fileObject = open(path + "/cspn_1st", "wb") pickle.dump(cspn_1st, fileObject) fileObject.close() # a list of cspns cspn_army = [] for i in range(3): print("cspn%s" % i) if i == 0: dataIn = blocked_images[i] else: dataIn = blocked_images[0] for j in range(1, i + 1):
# spn ds_context = Context(meta_types=[MetaType.REAL] * tr_block.shape[1]) ds_context.add_domains(tr_block) ds_context.parametric_types = [Gaussian] * tr_block.shape[1] cspn = learn_parametric(tr_block, ds_context, min_instances_slice=2 * len(tr_block), ohe=False) else: cspn = learn_conditional( tr_block, Context(meta_types=[MetaType.REAL] * tr_block.shape[1], parametric_types=[Conditional_Gaussian] * tr_block.shape[1]).add_domains(tr_block), scope=list(range(datasets[0][0].shape[1])), min_instances_slice=0.5 * tr_block.shape[0], memory=memory) cspns.append(cspn) if mpe_query_blocks is None: # first time, we only care about the structure to put nans mpe_query_blocks = np.zeros_like(tr_block[0:10, :].reshape(10, -1)) sample_query_blocks = np.zeros_like(tr_block[0:10, :].reshape( 10, -1)) else: # i+1 time: we set the previous mpe values as evidence mpe_query_blocks = np.zeros_like( np.array(tr_block[0:10, :].reshape(10, -1))) mpe_query_blocks[:, -(mpe_result.shape[1]):] = mpe_result
for i, ((tr_block, block_idx), conditional_blocks) in enumerate(datasets): print("learning", i) conditional_features_count = (tr_block.shape[1] // len(block_idx)) * conditional_blocks if i == 0: # spn ds_context = Context(meta_types=[MetaType.REAL] * tr_block.shape[1]) ds_context.add_domains(tr_block) ds_context.parametric_types = [Gaussian] * tr_block.shape[1] cspn = learn_parametric(tr_block, ds_context, min_instances_slice=20, ohe=False, memory=memory) else: cspn = learn_conditional( tr_block, Context( meta_types=[MetaType.REAL] * tr_block.shape[1], parametric_types=[Conditional_Gaussian] * tr_block.shape[1], ).add_domains(tr_block), scope=list(range(conditional_features_count)), min_instances_slice=30, memory=memory, ) cspns.append(cspn) print("done") # for i, ((tr_block, block_idx), conditional_blocks) in enumerate(datasets): # cspn = cspns[i] if i == 0: # first time, we only care about the structure to put nans mpe_query_blocks = np.zeros_like(tr_block[0:num_mpes, :].reshape(num_mpes, -1)) sample_query_blocks = np.zeros_like(tr_block[0:num_samples, :].reshape(num_samples, -1)) else: # i+1 time: we set the previous mpe values as evidence
get_blocks(images, num_blocks=(2, 2), blocks=[0]), # block of 1|0 get_blocks(images, num_blocks=(2, 2), blocks=[1, 0]), # block of 2|1,0 get_blocks(images, num_blocks=(2, 2), blocks=[2, 1, 0]), # block of 3|2,1,0 get_blocks(images, num_blocks=(2, 2), blocks=[3, 2, 1, 0]) ] cspns = [] mpe_query_blocks = None sample_query_blocks = None for i, (tr_block, block_idx) in enumerate(datasets): cspn = learn_conditional( np.concatenate((tr_block, data_labels_tr), axis=1), Context(meta_types=[MetaType.REAL] * tr_block.shape[1], parametric_types=[Conditional_Gaussian] * tr_block.shape[1]).add_domains(tr_block), scope=list(range(datasets[0][0].shape[1])), rows='tsne', min_instances_slice=1000, memory=memory) cspns.append(cspn) continue if mpe_query_blocks is None: # first time, we only care about the structure to put nans mpe_query_blocks = np.zeros_like(tr_block[0:10, :].reshape(10, -1)) sample_query_blocks = mpe_query_blocks else: # i+1 time: we set the previous mpe values as evidence mpe_query_blocks = np.zeros_like(np.array(tr_block[0:10, :].reshape(10, -1))) mpe_query_blocks[:, -(mpe_result.shape[1] - 10):] = mpe_result[:, 0:-10]
# In left, OUT right file_cache_path = "/tmp/cspn.bin" if not os.path.isfile(file_cache_path): spn_training_data = left.reshape(px, -1) spn_training_data = np.repeat(spn_training_data, 10, axis=0) ds_context = Context(parametric_types=[Bernoulli] * left.shape[1]).add_domains(spn_training_data) spn = learn_parametric(spn_training_data, ds_context, min_instances_slice=1) ds_context = Context(parametric_types=[Conditional_Bernoulli] * right.shape[1]).add_domains(right) scope = list(range(right.shape[1])) cspn = learn_conditional(conditional_training_data, ds_context, scope, min_instances_slice=60000000) with open(file_cache_path, 'wb') as f: pickle.dump((cspn, spn), f, pickle.HIGHEST_PROTOCOL) with open(file_cache_path, 'rb') as f: cspn, spn = pickle.load(f) def conditional_input_to_LR(input_images_in_rl): # format L|R images_to_lr = np.concatenate( (input_images_in_rl[:, input_images_in_rl.shape[1] // 2:].reshape(input_images_in_rl.shape[0], px, -1), input_images_in_rl[:, :input_images_in_rl.shape[1] // 2].reshape(input_images_in_rl.shape[0], px, -1)), axis=2).reshape( input_images_in_rl.shape[0], -1) return images_to_lr
MetaType.DISCRETE, MetaType.DISCRETE, MetaType.DISCRETE, MetaType.DISCRETE ]) ds_context.add_domains(dataOut) ds_context.parametric_types = [ Conditional_Poisson, Conditional_Poisson, Conditional_Poisson, Conditional_Poisson ] scope = list(range(dataOut.shape[1])) spn = Sum() for label, count in zip(*np.unique(data[:, 2], return_counts=True)): branch = learn_conditional(data, ds_context, scope, min_instances_slice=10000) spn.children.append(branch) spn.weights.append(count / data.shape[0]) spn.scope.extend(branch.scope) assign_ids(spn) print(spn) mpe_test = data[[0, 1, 2], :].astype(float) mpe_test[:, 0] = np.nan from spn.algorithms.MPE import mpe add_conditional_mpe_support()
# cspn dataIn = upperimage dataOut = bottomimage np.random.seed(42) # assert data.shape[1] == dataIn.shape[1] + dataOut.shape[1], 'invalid column size' # assert data.shape[0] == dataIn.shape[0] == dataOut.shape[0], 'invalid row size' ds_context = Context(meta_types=[MetaType.DISCRETE] * dataOut.shape[1]) ds_context.add_domains(dataOut) ds_context.parametric_types = [Conditional_Poisson] * dataOut.shape[1] scope = list(range(dataOut.shape[1])) cspn = learn_conditional(np.concatenate((dataOut, dataIn), axis=1), ds_context, scope, min_instances_slice=60000000) # spn.scope.extend(branch.scope) print(cspn) plot_spn(cspn, "basicspn.png") fileObject = open(cspn_file, "wb") pickle.dump(cspn, fileObject) fileObject.close() from numpy.random.mtrand import RandomState from spn.algorithms.Sampling import sample_instances from spn.structure.leaves.conditional.Sampling import add_conditional_sampling_support
if __name__ == '__main__': train_input, train_labels, test_input, test_labels = get_categorical_data('yeast') print(train_input.shape) print(train_labels.shape) print(test_input.shape) print(test_labels.shape) num_labels = train_labels.shape[1] ds_context = Context(parametric_types=[Conditional_Bernoulli] * num_labels) ds_context.add_domains(train_labels) train_data = np.concatenate((train_labels, train_input), axis=1) cspn = learn_conditional(train_data, ds_context, scope=list(range(num_labels)),rows='tsne', min_instances_slice=500, threshold=0.5, memory=memory) test_data = np.zeros_like(test_labels, dtype=np.float32) test_data[:] = np.nan test_data = np.concatenate((test_data, test_input), axis=1) pred_test_labels = mpe(cspn, test_data)[:, 0:num_labels] # compare with # https://papers.nips.cc/paper/1964-a-kernel-method-for-multi-labelled-classification.pdf binary_pred_labels = np.round(pred_test_labels).astype(int) binary_pred_labels[binary_pred_labels < 0] = 0 print("hamming_loss", hamming_loss(test_labels, binary_pred_labels)) print("zero_one_loss", zero_one_loss(test_labels, binary_pred_labels)) print("precision_score", precision_score(test_labels, binary_pred_labels, average='micro'))