示例#1
0
    def test_eval_parametric(self):
        data = np.array([1, 1, 1, 1, 1, 1, 1], dtype=np.float32).reshape(
            (1, 7))

        spn = (Gaussian(mean=1.0, stdev=1.0, scope=[0]) *
               Exponential(l=1.0, scope=[1]) *
               Gamma(alpha=1.0, beta=1.0, scope=[2]) *
               LogNormal(mean=1.0, stdev=1.0, scope=[3]) *
               Poisson(mean=1.0, scope=[4]) * Bernoulli(p=0.6, scope=[5]) *
               Categorical(p=[0.1, 0.2, 0.7], scope=[6]))

        ll = log_likelihood(spn, data)

        tf_ll = eval_tf(spn, data)

        self.assertTrue(np.all(np.isclose(ll, tf_ll)))

        spn_copy = Copy(spn)

        tf_graph, data_placeholder, variable_dict = spn_to_tf_graph(
            spn_copy, data, 1)

        with tf.Session() as sess:
            sess.run(tf.global_variables_initializer())
            tf_graph_to_spn(variable_dict)

        str_val = spn_to_str_equation(spn)
        str_val2 = spn_to_str_equation(spn_copy)

        self.assertEqual(str_val, str_val2)
示例#2
0
        def execute_tf():
            import tensorflow as tf
            from tensorflow.python.client import timeline
            import json

            tf.reset_default_graph()

            elapsed = 0
            data_placeholder = tf.placeholder(tf.int32, test_data.shape)
            tf_graph = spn_to_tf_graph(spn, data_placeholder, log_space=False)
            tfstart = time.perf_counter()
            n_repeats = 1000
            with tf.Session() as sess:

                for i in range(n_repeats):

                    run_options = tf.RunOptions(
                        trace_level=tf.RunOptions.FULL_TRACE)
                    run_metadata = tf.RunMetadata()

                    sess.run(tf.global_variables_initializer())
                    #start = time.perf_counter()
                    tf_ll = sess.run(tf_graph,
                                     feed_dict={data_placeholder: test_data},
                                     options=run_options,
                                     run_metadata=run_metadata)

                    continue
                    #end = time.perf_counter()

                    #e2 = end - start

                    ctf = timeline.Timeline(run_metadata.step_stats
                                            ).generate_chrome_trace_format()

                    rfile_path = outprefix + "tf_timelines2/time_line_%s.json" % i
                    if not os.path.exists(os.path.dirname(rfile_path)):
                        os.mkdir(os.path.dirname(rfile_path))
                    results_file = open(rfile_path, "w")
                    results_file.write(ctf)
                    results_file.close()

                    traceEvents = json.loads(ctf)["traceEvents"]
                    run_time = max([
                        o["ts"] + o["dur"]
                        for o in traceEvents if "ts" in o and "dur" in o
                    ]) - min([o["ts"] for o in traceEvents if "ts" in o])
                    run_time *= 1000

                    if i > 0:
                        #the first run is 10 times slower for whatever reason
                        elapsed += run_time

                    #if i % 20 == 0:
                    #print(exp, i, e2, run_time)
            tfend = time.perf_counter()
            tfelapsed = (tfend - tfstart) * 1000000000

            return np.log(tf_ll), tfelapsed / (n_repeats - 1)
示例#3
0
def convert_spn_to_tf_graph(spn, test_data, batch_size, dtype=None):
    """Converts an SPN into a tf.Graph."""
    print('\033[1mStart SPN conversion into tf.Tensor...\033[0m')
    start_time = time.time()

    # Do the conversion from SPN to tf.Graph
    spn_root, data_placeholder, variable_dict = spn_to_tf_graph(
        spn, test_data, batch_size=batch_size, dtype=dtype)

    duration = time.time() - start_time
    print('\033[1mFinished conversion after %.3f sec.\033[0m' % duration)

    return spn_root, data_placeholder, variable_dict
示例#4
0
    def test_torch_vs_tf_time(self):
        # Create sample data
        from sklearn.datasets.samples_generator import make_blobs
        import tensorflow as tf
        from time import time

        X, y = make_blobs(n_samples=10,
                          centers=3,
                          n_features=2,
                          random_state=0)
        X = X.astype(np.float32)

        # SPFLow implementation
        g00 = Gaussian(mean=0.0, stdev=1.0, scope=0)
        g10 = Gaussian(mean=1.0, stdev=2.0, scope=1)
        g01 = Gaussian(mean=3.0, stdev=2.0, scope=0)
        g11 = Gaussian(mean=5.0, stdev=1.0, scope=1)
        p0 = Product(children=[g00, g10])
        p1 = Product(children=[g01, g11])
        s = Sum(weights=[0.2, 0.8], children=[p0, p1])
        assign_ids(s)
        rebuild_scopes_bottom_up(s)

        # Convert
        tf_spn, data_placeholder, variable_dict = spn_to_tf_graph(s, data=X)
        torch_spn = SumNode.from_spn(s)

        # Optimizer
        lr = 0.001
        tf_optim = tf.train.AdamOptimizer(lr)
        torch_optim = optim.Adam(torch_spn.parameters(), lr)

        t0 = time()
        epochs = 10
        optimize_tf_graph(tf_spn,
                          variable_dict,
                          data_placeholder,
                          X,
                          epochs=epochs,
                          optimizer=tf_optim)
        t1 = time()
        optimize_torch(torch_spn, X, epochs=epochs, optimizer=torch_optim)
        t2 = time()

        print("Tensorflow took: ", t1 - t0)
        print("PyTorch took: ", t2 - t1)
示例#5
0
    def test_eval_gaussian(self):
        np.random.seed(17)
        data = np.random.normal(10, 0.01,
                                size=2000).tolist() + np.random.normal(
                                    30, 10, size=2000).tolist()
        data = np.array(data).reshape((-1, 10))

        ds_context = Context(meta_types=[MetaType.REAL] * data.shape[1],
                             parametric_types=[Gaussian] * data.shape[1])

        spn = learn_parametric(data, ds_context)

        ll = log_likelihood(spn, data)

        tf_graph, data_placeholder, variable_dict = spn_to_tf_graph(spn, data)
        tf_ll = eval_tf(tf_graph, data_placeholder, data)

        self.assertTrue(np.all(np.isclose(ll, tf_ll)))
示例#6
0
def run_tf(spflow_spn, n_feats, batch_size, repetitions):
    print("Running TF with: nfeat=%s, batch=%s" % (n_feats, batch_size))
    x = np.random.rand(batch_size, n_feats).astype(np.float32)
    tf_graph, placeholder, _ = spn_to_tf_graph(spflow_spn, x, dtype=np.float32)

    with tf.Session() as sess:
        sess.run(tf.global_variables_initializer())
        # warmup:
        for i in range(10):
            result = sess.run(tf_graph, feed_dict={placeholder: x})

        t = 0.0
        for i in tqdm(range(repetitions), desc="Repetition loop"):
            x = np.random.rand(batch_size, n_feats).astype(np.float32)
            t0 = time()
            result = sess.run(tf_graph, feed_dict={placeholder: x})
            t += time() - t0
    tf_time = t / repetitions
    return tf_time
示例#7
0
    def test_optimization(self):
        np.random.seed(17)
        data = np.random.normal(10, 0.01,
                                size=2000).tolist() + np.random.normal(
                                    30, 10, size=2000).tolist()
        data = np.array(data).reshape((-1, 10))
        data = data.astype(np.float32)

        ds_context = Context(meta_types=[MetaType.REAL] * data.shape[1],
                             parametric_types=[Gaussian] * data.shape[1])

        spn = learn_parametric(data, ds_context)

        spn.weights = [0.8, 0.2]

        py_ll = log_likelihood(spn, data)

        tf_graph, data_placeholder, variable_dict = spn_to_tf_graph(spn, data)

        loss = likelihood_loss(tf_graph)

        output = tf.train.AdamOptimizer(0.001).minimize(loss)

        with tf.Session() as session:
            session.run(tf.global_variables_initializer())
            for step in range(50):
                session.run(output, feed_dict={data_placeholder: data})
                # print("loss:", step, session.run(-loss, feed_dict={data_placeholder: data}))

            tf_ll_opt = session.run(tf_graph,
                                    feed_dict={
                                        data_placeholder: data
                                    }).reshape(-1, 1)

            tf_graph_to_spn(variable_dict)

        py_ll_opt = log_likelihood(spn, data)

        # print(tf_ll_opt.sum(), py_ll_opt.sum())

        self.assertTrue(np.all(np.isclose(tf_ll_opt, py_ll_opt)))

        self.assertLess(py_ll.sum(), tf_ll_opt.sum())
示例#8
0
    def test_eval_histogram(self):
        np.random.seed(17)
        data = np.random.normal(10, 0.01,
                                size=2000).tolist() + np.random.normal(
                                    30, 10, size=2000).tolist()
        data = np.array(data).reshape((-1, 10))
        data[data < 0] = 0
        data = data.astype(int)

        ds_context = Context(meta_types=[MetaType.DISCRETE] * data.shape[1])
        ds_context.add_domains(data)

        spn = learn_mspn(data, ds_context)

        ll = log_likelihood(spn, data)

        tf_graph, data_placeholder, variable_dict = spn_to_tf_graph(spn, data)
        tf_ll = eval_tf(tf_graph, data_placeholder, data)

        self.assertTrue(np.all(np.isclose(ll, tf_ll)))
示例#9
0
if __name__ == '__main__':
    add_histogram_inference_support()
    np.random.seed(17)
    data = np.random.normal(10, 0.01, size=2000).tolist() + np.random.normal(
        30, 10, size=2000).tolist()
    data = np.array(data).reshape((-1, 10))
    data[data < 0] = 0
    data = (data * 1).astype(int)

    ds_context = Context(meta_types=[MetaType.DISCRETE] * data.shape[1])
    ds_context.add_domains(data)

    data[:, 0] = 0
    data[:, 1] = 1

    spn = learn(data, ds_context)
    spn = create_histogram_leaf(data[:, 0].reshape((-1, 1)), ds_context, [0], alpha=False, hist_source="kde") * \
          create_histogram_leaf(data[:, 1].reshape((-1, 1)), ds_context, [1], alpha=False, hist_source="kde")

    spn = 0.3 * create_histogram_leaf(data[:, 0].reshape((-1, 1)), ds_context, [0], alpha=False, hist_source="kde") + \
          0.7 * create_histogram_leaf(data[:, 0].reshape((-1, 1)), ds_context, [0], alpha=False, hist_source="kde")

    py_ll = log_likelihood(spn, data)

    tf_graph, placeholder = spn_to_tf_graph(spn, data)

    log_tf_out = eval_tf(tf_graph, placeholder, data)

    print("results are similar for Log TF and Python?",
          np.all(np.isclose(py_ll, log_tf_out)))