def build_spn(features): spn_classification = learn_classifier( features, Context( parametric_types=[Gaussian, Categorical, Categorical, Gaussian ]).add_domains(features), learn_parametric, 2) return spn_classification
def classification(): import numpy as np np.random.seed(123) train_data = np.c_[np.r_[np.random.normal(5, 1, (500, 2)), np.random.normal(10, 1, (500, 2))], np.r_[np.zeros((500, 1)), np.ones((500, 1))]] centers = [[5, 5], [10, 10]] import matplotlib.pyplot as plt colors = ['#bda36b', '#7aaab4'] plt.figure() # plt.hold(True) for k, col in zip(range(2), colors): my_members = train_data[:, 2] == k plt.plot(train_data[my_members, 0], train_data[my_members, 1], 'w', markerfacecolor=col, marker='.') plt.plot(centers[k][0], centers[k][1], 'o', markerfacecolor=col, markeredgecolor='k', markersize=6) plt.title('Training Data') plt.grid(True) plt.savefig("classification_training_data.png", bbox_inches='tight', pad_inches=0) from spn.algorithms.LearningWrappers import learn_parametric, learn_classifier from spn.structure.leaves.parametric.Parametric import Categorical, Gaussian from spn.structure.Base import Context spn_classification = learn_classifier(train_data, Context(parametric_types=[Gaussian, Gaussian, Categorical]).add_domains( train_data), learn_parametric, 2) test_classification = np.array([3.0, 4.0, np.nan, 12.0, 18.0, np.nan]).reshape(-1, 3) print(test_classification) from spn.algorithms.MPE import mpe print(mpe(spn_classification, test_classification))
def learn_spn(data, min_inst): spn_classification = learn_classifier( data, Context(parametric_types=[Categorical] + [Gaussian] * (28 * 28)).add_domains(data), learn_parametric, 0, cols=get_split_cols_random_partition(np.random.RandomState(17)), rows="kmeans", min_instances_slice=min_inst) return spn_classification
def fit(self, X, y): # Check that X and y have correct shape X, y = check_X_y(X, y, multi_output=True) # Merge X and y train_data = np.c_[X, y].astype(np.float32) # If no parametric types were given: Assumen that all leafs are gaussian if self.parametric_types is None: parametric_types = [Gaussian] * X.shape[1] + [Categorical] else: parametric_types = self.parametric_types # Learn classifier self._spn = learn_classifier( train_data, ds_context=Context(parametric_types=parametric_types).add_domains(train_data), spn_learn_wrapper=learn_parametric, label_idx=X.shape[1], cpus=self.n_jobs, ) # If pre optimization hook has been defined, run now if self.tf_pre_optimization_hook: self._spn = self.tf_pre_optimization_hook(self._spn) # If optimization flag is set: optimize weights in tf if self.tf_optimize_weights: self._spn, self.loss = optimize_tf( spn=self._spn, data=train_data, optimizer=self.tf_optimizer, batch_size=self.tf_batch_size, epochs=self.tf_n_epochs, return_loss=True, ) # If post optimization hook has been defined, run now if self.tf_post_optimization_hook: self._spn = self.tf_post_optimization_hook(self._spn) self.X_ = X self.y_ = y # Return the classifier return self
values=y_train, axis=1) test_data_with_labels = np.insert(X_test, obj=X_test.shape[1], values=y_test, axis=1) # Learn SPN parametric_types = [ Gaussian, Gaussian, Gaussian, Gaussian, Gaussian, Gaussian, Gaussian, Gaussian, Gaussian ] target_position = 8 context = Context( parametric_types=parametric_types).add_domains(train_data_with_labels) spn = learn_classifier(train_data_with_labels, context, learn_parametric, target_position) # Plot SPN # plot_spn(spn, 'images/california_housing_spn.png') # Predict true_values = np.array(test_data_with_labels[:, -1]) items_to_predict = test_data_with_labels items_to_predict[:, target_position] = np.nan predicted_values = mpe(spn, test_data_with_labels) predicted_labels = predicted_values[:, target_position] error = mean_squared_error(true_values, predicted_labels) print(f'MSE test: {error}')
# Read the CSV into a pandas data frame (df) dftr = pd.read_csv(train_path, delimiter=',') train_data = np.array(dftr) dfte = pd.read_csv(test_path, delimiter=',') test_data = np.array(dfte) #read csv files to arrays and convert types XX = train_data[:, 1:(len(train_data[0]))] X = np.array(XX, dtype=np.float) #TODO: the first column as integer since is the binary class t = [Categorical] for i in range(200): t.append(Gaussian) #Learning on train data spn_classification = learn_classifier( X, Context(parametric_types=t).add_domains(X), learn_parametric, 0) TT = test_data[:, 1:(len(test_data[0]))] R = test_data[:, [0]] T = np.array(TT, dtype=np.float) nan = np.array([[np.nan]] * 200000) T = np.append(T, nan, axis=1) test_classification = T #predicting on test data from spn.algorithms.MPE import mpe print(mpe(spn_classification, test_classification))
plt.show() # ---- Model Learning ---- # Training parameters parametric_types = [Gaussian] * 784 + [Categorical] min_instances_slice = 250 # smaller value leads to deeper SPN threshold = 0.5 # alpha: the smaller alpha the more product nodes are added context = Context( parametric_types=parametric_types).add_domains(train_data) # Model training print('\033[1mStart SPN training...\033[0m') start_time = time.time() spn = learn_classifier(data=train_data, ds_context=context, spn_learn_wrapper=learn_parametric, label_idx=label_idx, min_instances_slice=min_instances_slice, threshold=threshold) duration = time.time() - start_time print('\033[1mFinished training after %.3f sec.\033[0m' % duration) # Save model output_path = "/home/ml-mrothermel/projects/Interpreting-SPNs/output/spns" file_name = "mnist_spn_99.pckl" save_object_to(spn, output_path + "/" + file_name)
if __name__ == '__main__': data = genfromtxt('20180511-for-SPN.csv', delimiter=',', skip_header=True)[:, [0, 1, 3]] print(data) ds_context = Context( meta_types=[MetaType.REAL, MetaType.REAL, MetaType.DISCRETE]) # ds_context.parametric_type = [Gaussian, Gaussian, Categorical] ds_context.add_domains(data) def create_leaf(data, ds_context, scope): return create_piecewise_leaf(data, ds_context, scope, isotonic=False, prior_weight=None) def learn_wrapper(data, ds_context): return learn_mspn(data, ds_context, min_instances_slice=100, leaves=create_leaf, memory=memory) spn = learn_classifier(data, ds_context, learn_wrapper, 2) print("learned") plot_density(spn, data)
# # Here, we model our problem as containing 3 features: two Gaussians for the # coordinates and one Categorical for the label. We specify that the label is # in column 2, and create the corresponding SPN. train_data = np.c_[np.r_[np.random.normal(5, 1, (500, 2)), np.random.normal(10, 1, (500, 2))], np.r_[np.zeros( (500, 1)), np.ones((500, 1))], ] sns.scatterplot(train_data[:, 0], train_data[:, 1], hue=train_data[:, 2]) # %% # We can learn an SPN from the training data: spn_classification = learn_classifier( train_data, Context(parametric_types=[Gaussian, Gaussian, Categorical]).add_domains( train_data), learn_parametric, 2) from spn.io.Graphics import draw_spn draw_spn(spn_classification) # %% # Now, imagine we want to classify two instances, one located at :math:`(3,4)` # and another one at :math:`(12,8)`. To do that, we first create an array with # two rows and 3 columns. We set the last column to ``np.nan`` to indicate # that we don't know the labels. And we set the rest of the values in the 2D # array accordingly. test_data = np.array([3.0, 4.0, np.nan, 12.0, 18.0, np.nan]).reshape(-1, 3)
import matplotlib.pyplot as plt from sklearn import datasets from sklearn.model_selection import train_test_split from sklearn.metrics import accuracy_score # Prepare data iris = datasets.load_iris() X_train, X_test, y_train, y_test= train_test_split(iris.data, iris.target, test_size = 0.4, random_state = 42) train_data_with_labels = np.insert(X_train, obj=X_train.shape[1], values=y_train, axis=1) test_data_with_labels = np.insert(X_test, obj=X_test.shape[1], values=y_test, axis=1) # Learn SPN context = Context(parametric_types=[Gaussian, Gaussian, Gaussian, Gaussian, Categorical]).add_domains(train_data_with_labels) spn_classification = learn_classifier(train_data_with_labels, context, learn_parametric, 4) # Plot SPN plot_spn(spn_classification, 'images/iris_spn.png') # Predict true_values = np.array(test_data_with_labels[:,-1]) items_to_predict = test_data_with_labels items_to_predict[:, 4] = np.nan predicted_values = mpe(spn_classification, test_data_with_labels) predicted_labels = predicted_values[:, 4] acc = accuracy_score(true_values, predicted_labels) print(acc)