示例#1
0
def build_spn(features):
    spn_classification = learn_classifier(
        features,
        Context(
            parametric_types=[Gaussian, Categorical, Categorical, Gaussian
                              ]).add_domains(features), learn_parametric, 2)
    return spn_classification
示例#2
0
def classification():
    import numpy as np
    np.random.seed(123)
    train_data = np.c_[np.r_[np.random.normal(5, 1, (500, 2)), np.random.normal(10, 1, (500, 2))],
                       np.r_[np.zeros((500, 1)), np.ones((500, 1))]]

    centers = [[5, 5], [10, 10]]

    import matplotlib.pyplot as plt
    colors = ['#bda36b', '#7aaab4']
    plt.figure()
    # plt.hold(True)
    for k, col in zip(range(2), colors):
        my_members = train_data[:, 2] == k
        plt.plot(train_data[my_members, 0], train_data[my_members, 1], 'w', markerfacecolor=col, marker='.')
        plt.plot(centers[k][0], centers[k][1], 'o', markerfacecolor=col, markeredgecolor='k', markersize=6)
    plt.title('Training Data')
    plt.grid(True)
    plt.savefig("classification_training_data.png", bbox_inches='tight', pad_inches=0)

    from spn.algorithms.LearningWrappers import learn_parametric, learn_classifier
    from spn.structure.leaves.parametric.Parametric import Categorical, Gaussian
    from spn.structure.Base import Context
    spn_classification = learn_classifier(train_data,
                                          Context(parametric_types=[Gaussian, Gaussian, Categorical]).add_domains(
                                              train_data),
                                          learn_parametric, 2)

    test_classification = np.array([3.0, 4.0, np.nan, 12.0, 18.0, np.nan]).reshape(-1, 3)
    print(test_classification)
    from spn.algorithms.MPE import mpe
    print(mpe(spn_classification, test_classification))
示例#3
0
def learn_spn(data, min_inst):
    spn_classification = learn_classifier(
        data,
        Context(parametric_types=[Categorical] + [Gaussian] *
                (28 * 28)).add_domains(data),
        learn_parametric,
        0,
        cols=get_split_cols_random_partition(np.random.RandomState(17)),
        rows="kmeans",
        min_instances_slice=min_inst)
    return spn_classification
示例#4
0
    def fit(self, X, y):
        # Check that X and y have correct shape
        X, y = check_X_y(X, y, multi_output=True)

        # Merge X and y
        train_data = np.c_[X, y].astype(np.float32)

        # If no parametric types were given: Assumen that all leafs are gaussian
        if self.parametric_types is None:
            parametric_types = [Gaussian] * X.shape[1] + [Categorical]
        else:
            parametric_types = self.parametric_types

        # Learn classifier
        self._spn = learn_classifier(
            train_data,
            ds_context=Context(parametric_types=parametric_types).add_domains(train_data),
            spn_learn_wrapper=learn_parametric,
            label_idx=X.shape[1],
            cpus=self.n_jobs,
        )

        # If pre optimization hook has been defined, run now
        if self.tf_pre_optimization_hook:
            self._spn = self.tf_pre_optimization_hook(self._spn)

        # If optimization flag is set: optimize weights in tf
        if self.tf_optimize_weights:
            self._spn, self.loss = optimize_tf(
                spn=self._spn,
                data=train_data,
                optimizer=self.tf_optimizer,
                batch_size=self.tf_batch_size,
                epochs=self.tf_n_epochs,
                return_loss=True,
            )

        # If post optimization hook has been defined, run now
        if self.tf_post_optimization_hook:
            self._spn = self.tf_post_optimization_hook(self._spn)

        self.X_ = X
        self.y_ = y

        # Return the classifier
        return self
示例#5
0
                                   values=y_train,
                                   axis=1)
test_data_with_labels = np.insert(X_test,
                                  obj=X_test.shape[1],
                                  values=y_test,
                                  axis=1)

# Learn SPN
parametric_types = [
    Gaussian, Gaussian, Gaussian, Gaussian, Gaussian, Gaussian, Gaussian,
    Gaussian, Gaussian
]
target_position = 8
context = Context(
    parametric_types=parametric_types).add_domains(train_data_with_labels)
spn = learn_classifier(train_data_with_labels, context, learn_parametric,
                       target_position)

# Plot SPN
# plot_spn(spn, 'images/california_housing_spn.png')

# Predict
true_values = np.array(test_data_with_labels[:, -1])
items_to_predict = test_data_with_labels
items_to_predict[:, target_position] = np.nan
predicted_values = mpe(spn, test_data_with_labels)
predicted_labels = predicted_values[:, target_position]

error = mean_squared_error(true_values, predicted_labels)
print(f'MSE test: {error}')
示例#6
0
# Read the CSV into a pandas data frame (df)
dftr = pd.read_csv(train_path, delimiter=',')
train_data = np.array(dftr)
dfte = pd.read_csv(test_path, delimiter=',')
test_data = np.array(dfte)

#read csv files to arrays and convert types
XX = train_data[:, 1:(len(train_data[0]))]
X = np.array(XX, dtype=np.float)
#TODO: the first column as integer since is the binary class

t = [Categorical]
for i in range(200):
    t.append(Gaussian)
#Learning on train data
spn_classification = learn_classifier(
    X,
    Context(parametric_types=t).add_domains(X), learn_parametric, 0)

TT = test_data[:, 1:(len(test_data[0]))]
R = test_data[:, [0]]

T = np.array(TT, dtype=np.float)
nan = np.array([[np.nan]] * 200000)
T = np.append(T, nan, axis=1)
test_classification = T
#predicting on test data
from spn.algorithms.MPE import mpe

print(mpe(spn_classification, test_classification))
示例#7
0
    plt.show()

    # ---- Model Learning ----

    # Training parameters
    parametric_types = [Gaussian] * 784 + [Categorical]
    min_instances_slice = 250  # smaller value leads to deeper SPN
    threshold = 0.5  # alpha: the smaller alpha the more product nodes are added

    context = Context(
        parametric_types=parametric_types).add_domains(train_data)

    # Model training
    print('\033[1mStart SPN training...\033[0m')
    start_time = time.time()

    spn = learn_classifier(data=train_data,
                           ds_context=context,
                           spn_learn_wrapper=learn_parametric,
                           label_idx=label_idx,
                           min_instances_slice=min_instances_slice,
                           threshold=threshold)

    duration = time.time() - start_time
    print('\033[1mFinished training after %.3f sec.\033[0m' % duration)

    # Save model
    output_path = "/home/ml-mrothermel/projects/Interpreting-SPNs/output/spns"
    file_name = "mnist_spn_99.pckl"
    save_object_to(spn, output_path + "/" + file_name)
示例#8
0
if __name__ == '__main__':
    data = genfromtxt('20180511-for-SPN.csv', delimiter=',',
                      skip_header=True)[:, [0, 1, 3]]

    print(data)

    ds_context = Context(
        meta_types=[MetaType.REAL, MetaType.REAL, MetaType.DISCRETE])
    # ds_context.parametric_type = [Gaussian, Gaussian, Categorical]
    ds_context.add_domains(data)

    def create_leaf(data, ds_context, scope):
        return create_piecewise_leaf(data,
                                     ds_context,
                                     scope,
                                     isotonic=False,
                                     prior_weight=None)

    def learn_wrapper(data, ds_context):
        return learn_mspn(data,
                          ds_context,
                          min_instances_slice=100,
                          leaves=create_leaf,
                          memory=memory)

    spn = learn_classifier(data, ds_context, learn_wrapper, 2)

    print("learned")

    plot_density(spn, data)
#
# Here, we model our problem as containing 3 features: two Gaussians for the
# coordinates and one Categorical for the label. We specify that the label is
# in column 2, and create the corresponding SPN.

train_data = np.c_[np.r_[np.random.normal(5, 1, (500, 2)),
                         np.random.normal(10, 1, (500, 2))], np.r_[np.zeros(
                             (500, 1)), np.ones((500, 1))], ]

sns.scatterplot(train_data[:, 0], train_data[:, 1], hue=train_data[:, 2])

# %%
# We can learn an SPN from the training data:

spn_classification = learn_classifier(
    train_data,
    Context(parametric_types=[Gaussian, Gaussian, Categorical]).add_domains(
        train_data), learn_parametric, 2)

from spn.io.Graphics import draw_spn

draw_spn(spn_classification)

# %%
# Now, imagine we want to classify two instances, one located at :math:`(3,4)`
# and another one at :math:`(12,8)`. To do that, we first create an array with
# two rows and 3 columns. We set the last column to ``np.nan`` to indicate
# that we don't know the labels. And we set the rest of the values in the 2D
# array accordingly.

test_data = np.array([3.0, 4.0, np.nan, 12.0, 18.0, np.nan]).reshape(-1, 3)
示例#10
0
import matplotlib.pyplot as plt

from sklearn import datasets
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

# Prepare data
iris = datasets.load_iris()
X_train, X_test, y_train, y_test= train_test_split(iris.data, iris.target, test_size = 0.4, random_state = 42)
train_data_with_labels = np.insert(X_train, obj=X_train.shape[1], values=y_train, axis=1)
test_data_with_labels = np.insert(X_test, obj=X_test.shape[1], values=y_test, axis=1)

# Learn SPN
context = Context(parametric_types=[Gaussian, Gaussian, Gaussian, Gaussian, Categorical]).add_domains(train_data_with_labels)
spn_classification = learn_classifier(train_data_with_labels, context, learn_parametric, 4)

# Plot SPN
plot_spn(spn_classification, 'images/iris_spn.png')

# Predict
true_values = np.array(test_data_with_labels[:,-1])
items_to_predict = test_data_with_labels
items_to_predict[:, 4] = np.nan
predicted_values = mpe(spn_classification, test_data_with_labels)
predicted_labels = predicted_values[:, 4]

acc = accuracy_score(true_values, predicted_labels)
print(acc)