Пример #1
0
    def test_Piecewise_expectations_with_evidence(self):
        adata = np.zeros((20000, 2))
        adata[:, 1] = 0
        adata[:, 0] = np.random.normal(loc=100.0,
                                       scale=5.00,
                                       size=adata.shape[0])

        bdata = np.zeros_like(adata)
        bdata[:, 1] = 1
        bdata[:, 0] = np.random.normal(loc=50.0,
                                       scale=5.00,
                                       size=bdata.shape[0])

        data = np.vstack((adata, bdata))

        ds_context = Context(meta_types=[MetaType.REAL, MetaType.DISCRETE])
        ds_context.parametric_types = [None, Categorical]
        ds_context.add_domains(data)
        L = create_piecewise_leaf(
            adata[:, 0].reshape(-1, 1),
            ds_context,
            scope=[0],
            prior_weight=None,
            hist_source="numpy") * create_parametric_leaf(
                adata[:, 1].reshape(-1, 1), ds_context, scope=[1])
        R = create_piecewise_leaf(
            bdata[:, 0].reshape(-1, 1),
            ds_context,
            scope=[0],
            prior_weight=None,
            hist_source="numpy") * create_parametric_leaf(
                bdata[:, 1].reshape(-1, 1), ds_context, scope=[1])

        spn = 0.5 * L + 0.5 * R

        evidence = np.zeros((2, 2))
        evidence[1, 1] = 1
        evidence[:, 0] = np.nan
        expectation = Expectation(spn, set([0]), evidence)

        self.assertAlmostEqual(np.mean(adata[:, 0]), expectation[0, 0], 2)
        self.assertAlmostEqual(np.mean(bdata[:, 0]), expectation[1, 0], 2)
Пример #2
0
    )

    # # spn
    # ds_context = Context(meta_types=[MetaType.REAL] * blocked_images[0].shape[1])
    # ds_context.add_domains(blocked_images[0])
    # ds_context.parametric_type = [Poisson] * blocked_images[0].shape[1]
    #
    # print("data ready", data.shape)
    # #the following two options should be working now.
    # # spn = learn_structure(upperimage, ds_context, get_split_rows_random_partition(np.random.RandomState(17)), get_split_cols_random_partition(np.random.RandomState(17)), create_parametric_leaf)
    # spn = learn_parametric(blocked_images[0], ds_context, min_instances_slice=0.1*len(data), ohe=False)

    # spn
    ds_context = Context(meta_types=[MetaType.DISCRETE] * 10)
    ds_context.add_domains(data_labels)
    ds_context.parametric_types = [Bernoulli] * blocked_images[0].shape[1]
    spn = learn_parametric(data_labels,
                           ds_context,
                           min_instances_slice=0.3 * len(data_labels))

    # first cspn
    dataIn = data_labels
    dataOut = blocked_images[0]
    ds_context = Context(meta_types=[MetaType.DISCRETE] * dataOut.shape[1])
    ds_context.add_domains(dataOut)
    ds_context.parametric_types = [Conditional_Poisson] * dataOut.shape[1]

    scope = list(range(dataOut.shape[1]))
    print(np.shape(dataIn), np.shape(dataOut))
    print(dataIn[0], dataOut[0])
    cspn_1st = learn_conditional(np.concatenate((dataOut, dataIn), axis=1),
Пример #3
0
add_histogram_inference_support()
add_parametric_inference_support()
memory = Memory(cachedir="cache", verbose=0, compress=9)

data = []
for x in range(10):
    for y in range(10):
        for z in range(10):
            data.append([x, y, z, int(((x + y + z) / 5))])
data = np.array(data).astype(np.float)
types = [
    MetaType.DISCRETE, MetaType.DISCRETE, MetaType.DISCRETE, MetaType.DISCRETE
]

ds_context = Context(meta_types=types)
ds_context.parametric_types = [Gaussian, Gaussian, Gaussian, Categorical]
ds_context.add_domains(data)

num_classes = len(np.unique(data[:, 3]))

#spn = learn_mspn(data, ds_context, min_instances_slice=10, leaves=create_leaf, threshold=0.3)

spn = Sum()
for label, count in zip(*np.unique(data[:, 3], return_counts=True)):
    branch = learn_mspn(data[data[:, 3] == label, :],
                        ds_context,
                        min_instances_slice=10,
                        leaves=create_leaf,
                        threshold=0.1)
    spn.children.append(branch)
    spn.weights.append(count / data.shape[0])
        datasets.append((get_blocks(images, num_blocks=num_blocks, blocks=block_ids.tolist()), 1))

    num_mpes = 1
    num_samples = 10

    cspns = []
    mpe_query_blocks = None
    sample_query_blocks = None
    for i, ((tr_block, block_idx), conditional_blocks) in enumerate(datasets):
        print("learning", i)
        conditional_features_count = (tr_block.shape[1] // len(block_idx)) * conditional_blocks
        if i == 0:
            # spn
            ds_context = Context(meta_types=[MetaType.REAL] * tr_block.shape[1])
            ds_context.add_domains(tr_block)
            ds_context.parametric_types = [Gaussian] * tr_block.shape[1]

            cspn = learn_parametric(tr_block, ds_context, min_instances_slice=20, ohe=False, memory=memory)
        else:
            cspn = learn_conditional(
                tr_block,
                Context(
                    meta_types=[MetaType.REAL] * tr_block.shape[1],
                    parametric_types=[Conditional_Gaussian] * tr_block.shape[1],
                ).add_domains(tr_block),
                scope=list(range(conditional_features_count)),
                min_instances_slice=30,
                memory=memory,
            )
        cspns.append(cspn)
        print("done")
Пример #5
0
def train_spn(window_size=3,
              min_instances_slice=10000,
              features=None,
              number_of_classes=3):
    if features is None:
        features = [20, 120]

    add_parametric_inference_support()
    add_parametric_text_support()

    data = get_data_in_window(window_size=window_size,
                              features=features,
                              three_classes=number_of_classes == 3)

    sss = sk.model_selection.StratifiedShuffleSplit(test_size=0.2,
                                                    train_size=0.8,
                                                    random_state=42)
    for train_index, test_index in sss.split(
            data[:, 0:window_size * window_size * len(features)],
            data[:, (window_size * window_size * len(features)) +
                 (int(window_size * window_size / 2))]):
        X_train, X_test = data[train_index], data[test_index]

    context_list = list()
    parametric_list = list()
    number_of_features = len(features)
    for _ in range(number_of_features * window_size * window_size):
        context_list.append(MetaType.REAL)
        parametric_list.append(Gaussian)

    for _ in range(window_size * window_size):
        context_list.append(MetaType.DISCRETE)
        parametric_list.append(Categorical)

    ds_context = Context(meta_types=context_list)
    ds_context.add_domains(data)
    ds_context.parametric_types = parametric_list

    spn = load_spn(window_size, features, min_instances_slice,
                   number_of_classes)
    if spn is None:
        spn = Sum()
        for class_pixel in tqdm(range(-window_size * window_size, 0)):
            for label, count in zip(
                    *np.unique(data[:, class_pixel], return_counts=True)):
                train_data = X_train[X_train[:, class_pixel] == label, :]
                branch = learn_parametric(
                    train_data,
                    ds_context,
                    min_instances_slice=min_instances_slice)
                spn.children.append(branch)
                spn.weights.append(train_data.shape[0])

        spn.scope.extend(branch.scope)
        spn.weights = (np.array(spn.weights) / sum(spn.weights)).tolist()

        assign_ids(spn)
        save_spn(spn, window_size, features, min_instances_slice,
                 number_of_classes)

    res = np.ndarray((X_test.shape[0], number_of_classes))

    for i in tqdm(range(number_of_classes)):
        tmp = X_test.copy()
        tmp[:, -int((window_size**2) / 2)] = i
        res[:, i] = log_likelihood(spn, tmp)[:, 0]

    predicted_classes = np.argmax(res, axis=1).reshape((X_test.shape[0], 1))

    correct_predicted = 0
    for x, y in zip(X_test[:, -5], predicted_classes):
        if x == y[0]:
            correct_predicted += 1
    accuracy = correct_predicted / X_test.shape[0]
    return spn, accuracy
Пример #6
0
    np.random.seed(42)
    dataIn = np.random.randint(low=0, high=3, size=600).reshape(-1, 2)
    dataOut = np.random.randint(low=0, high=3, size=1200).reshape(-1, 4)
    data = np.concatenate((dataOut, dataIn), axis=1)
    assert data.shape[
        1] == dataIn.shape[1] + dataOut.shape[1], 'invalid column size'
    assert data.shape[0] == dataIn.shape[0] == dataOut.shape[
        0], 'invalid row size'

    ds_context = Context(meta_types=[
        MetaType.DISCRETE, MetaType.DISCRETE, MetaType.DISCRETE,
        MetaType.DISCRETE
    ])
    ds_context.add_domains(dataOut)
    ds_context.parametric_types = [
        Conditional_Poisson, Conditional_Poisson, Conditional_Poisson,
        Conditional_Poisson
    ]

    scope = list(range(dataOut.shape[1]))

    spn = Sum()

    for label, count in zip(*np.unique(data[:, 2], return_counts=True)):
        branch = learn_conditional(data,
                                   ds_context,
                                   scope,
                                   min_instances_slice=10000)
        spn.children.append(branch)
        spn.weights.append(count / data.shape[0])

    spn.scope.extend(branch.scope)
Пример #7
0
    #
    # print(sample_instances(spn, np.array([[np.nan] * 50] * 3).reshape(-1, 50), RandomState(123)))
    #
    # 0/0

    # cspn
    dataIn = upperimage
    dataOut = bottomimage

    np.random.seed(42)
    # assert data.shape[1] == dataIn.shape[1] + dataOut.shape[1], 'invalid column size'
    # assert data.shape[0] == dataIn.shape[0] == dataOut.shape[0], 'invalid row size'

    ds_context = Context(meta_types=[MetaType.DISCRETE] * dataOut.shape[1])
    ds_context.add_domains(dataOut)
    ds_context.parametric_types = [Conditional_Poisson] * dataOut.shape[1]

    scope = list(range(dataOut.shape[1]))

    cspn = learn_conditional(np.concatenate((dataOut, dataIn), axis=1),
                             ds_context,
                             scope,
                             min_instances_slice=60000000)

    # spn.scope.extend(branch.scope)

    print(cspn)
    plot_spn(cspn, "basicspn.png")

    fileObject = open(cspn_file, "wb")
    pickle.dump(cspn, fileObject)
Пример #8
0
from spn.structure.leaves.parametric.Parametric import Poisson, Categorical
from spn.structure.leaves.parametric.Text import add_parametric_text_support

if __name__ == '__main__':
    add_parametric_inference_support()
    add_parametric_text_support()

    np.random.seed(42)
    data = np.random.randint(low=0, high=3, size=600).reshape(-1, 3)

    #print(data)

    ds_context = Context(
        meta_types=[MetaType.DISCRETE, MetaType.DISCRETE, MetaType.DISCRETE])
    ds_context.add_domains(data)
    ds_context.parametric_types = [Poisson, Poisson, Categorical]

    spn = Sum()

    for label, count in zip(*np.unique(data[:, 2], return_counts=True)):
        branch = learn_parametric(data[data[:, 2] == label, :],
                                  ds_context,
                                  min_instances_slice=10000)
        spn.children.append(branch)
        spn.weights.append(count / data.shape[0])

    spn.scope.extend(branch.scope)

    print(spn)

    print(spn_to_str_equation(spn))