def test_eval_parametric(self): data = np.array([1, 1, 1, 1, 1, 1, 1], dtype=np.float32).reshape( (1, 7)) spn = (Gaussian(mean=1.0, stdev=1.0, scope=[0]) * Exponential(l=1.0, scope=[1]) * Gamma(alpha=1.0, beta=1.0, scope=[2]) * LogNormal(mean=1.0, stdev=1.0, scope=[3]) * Poisson(mean=1.0, scope=[4]) * Bernoulli(p=0.6, scope=[5]) * Categorical(p=[0.1, 0.2, 0.7], scope=[6])) ll = log_likelihood(spn, data) tf_ll = eval_tf(spn, data) self.assertTrue(np.all(np.isclose(ll, tf_ll))) spn_copy = Copy(spn) tf_graph, data_placeholder, variable_dict = spn_to_tf_graph( spn_copy, data, 1) with tf.Session() as sess: sess.run(tf.global_variables_initializer()) tf_graph_to_spn(variable_dict) str_val = spn_to_str_equation(spn) str_val2 = spn_to_str_equation(spn_copy) self.assertEqual(str_val, str_val2)
def execute_tf(): import tensorflow as tf from tensorflow.python.client import timeline import json tf.reset_default_graph() elapsed = 0 data_placeholder = tf.placeholder(tf.int32, test_data.shape) tf_graph = spn_to_tf_graph(spn, data_placeholder, log_space=False) tfstart = time.perf_counter() n_repeats = 1000 with tf.Session() as sess: for i in range(n_repeats): run_options = tf.RunOptions( trace_level=tf.RunOptions.FULL_TRACE) run_metadata = tf.RunMetadata() sess.run(tf.global_variables_initializer()) #start = time.perf_counter() tf_ll = sess.run(tf_graph, feed_dict={data_placeholder: test_data}, options=run_options, run_metadata=run_metadata) continue #end = time.perf_counter() #e2 = end - start ctf = timeline.Timeline(run_metadata.step_stats ).generate_chrome_trace_format() rfile_path = outprefix + "tf_timelines2/time_line_%s.json" % i if not os.path.exists(os.path.dirname(rfile_path)): os.mkdir(os.path.dirname(rfile_path)) results_file = open(rfile_path, "w") results_file.write(ctf) results_file.close() traceEvents = json.loads(ctf)["traceEvents"] run_time = max([ o["ts"] + o["dur"] for o in traceEvents if "ts" in o and "dur" in o ]) - min([o["ts"] for o in traceEvents if "ts" in o]) run_time *= 1000 if i > 0: #the first run is 10 times slower for whatever reason elapsed += run_time #if i % 20 == 0: #print(exp, i, e2, run_time) tfend = time.perf_counter() tfelapsed = (tfend - tfstart) * 1000000000 return np.log(tf_ll), tfelapsed / (n_repeats - 1)
def convert_spn_to_tf_graph(spn, test_data, batch_size, dtype=None): """Converts an SPN into a tf.Graph.""" print('\033[1mStart SPN conversion into tf.Tensor...\033[0m') start_time = time.time() # Do the conversion from SPN to tf.Graph spn_root, data_placeholder, variable_dict = spn_to_tf_graph( spn, test_data, batch_size=batch_size, dtype=dtype) duration = time.time() - start_time print('\033[1mFinished conversion after %.3f sec.\033[0m' % duration) return spn_root, data_placeholder, variable_dict
def test_torch_vs_tf_time(self): # Create sample data from sklearn.datasets.samples_generator import make_blobs import tensorflow as tf from time import time X, y = make_blobs(n_samples=10, centers=3, n_features=2, random_state=0) X = X.astype(np.float32) # SPFLow implementation g00 = Gaussian(mean=0.0, stdev=1.0, scope=0) g10 = Gaussian(mean=1.0, stdev=2.0, scope=1) g01 = Gaussian(mean=3.0, stdev=2.0, scope=0) g11 = Gaussian(mean=5.0, stdev=1.0, scope=1) p0 = Product(children=[g00, g10]) p1 = Product(children=[g01, g11]) s = Sum(weights=[0.2, 0.8], children=[p0, p1]) assign_ids(s) rebuild_scopes_bottom_up(s) # Convert tf_spn, data_placeholder, variable_dict = spn_to_tf_graph(s, data=X) torch_spn = SumNode.from_spn(s) # Optimizer lr = 0.001 tf_optim = tf.train.AdamOptimizer(lr) torch_optim = optim.Adam(torch_spn.parameters(), lr) t0 = time() epochs = 10 optimize_tf_graph(tf_spn, variable_dict, data_placeholder, X, epochs=epochs, optimizer=tf_optim) t1 = time() optimize_torch(torch_spn, X, epochs=epochs, optimizer=torch_optim) t2 = time() print("Tensorflow took: ", t1 - t0) print("PyTorch took: ", t2 - t1)
def test_eval_gaussian(self): np.random.seed(17) data = np.random.normal(10, 0.01, size=2000).tolist() + np.random.normal( 30, 10, size=2000).tolist() data = np.array(data).reshape((-1, 10)) ds_context = Context(meta_types=[MetaType.REAL] * data.shape[1], parametric_types=[Gaussian] * data.shape[1]) spn = learn_parametric(data, ds_context) ll = log_likelihood(spn, data) tf_graph, data_placeholder, variable_dict = spn_to_tf_graph(spn, data) tf_ll = eval_tf(tf_graph, data_placeholder, data) self.assertTrue(np.all(np.isclose(ll, tf_ll)))
def run_tf(spflow_spn, n_feats, batch_size, repetitions): print("Running TF with: nfeat=%s, batch=%s" % (n_feats, batch_size)) x = np.random.rand(batch_size, n_feats).astype(np.float32) tf_graph, placeholder, _ = spn_to_tf_graph(spflow_spn, x, dtype=np.float32) with tf.Session() as sess: sess.run(tf.global_variables_initializer()) # warmup: for i in range(10): result = sess.run(tf_graph, feed_dict={placeholder: x}) t = 0.0 for i in tqdm(range(repetitions), desc="Repetition loop"): x = np.random.rand(batch_size, n_feats).astype(np.float32) t0 = time() result = sess.run(tf_graph, feed_dict={placeholder: x}) t += time() - t0 tf_time = t / repetitions return tf_time
def test_optimization(self): np.random.seed(17) data = np.random.normal(10, 0.01, size=2000).tolist() + np.random.normal( 30, 10, size=2000).tolist() data = np.array(data).reshape((-1, 10)) data = data.astype(np.float32) ds_context = Context(meta_types=[MetaType.REAL] * data.shape[1], parametric_types=[Gaussian] * data.shape[1]) spn = learn_parametric(data, ds_context) spn.weights = [0.8, 0.2] py_ll = log_likelihood(spn, data) tf_graph, data_placeholder, variable_dict = spn_to_tf_graph(spn, data) loss = likelihood_loss(tf_graph) output = tf.train.AdamOptimizer(0.001).minimize(loss) with tf.Session() as session: session.run(tf.global_variables_initializer()) for step in range(50): session.run(output, feed_dict={data_placeholder: data}) # print("loss:", step, session.run(-loss, feed_dict={data_placeholder: data})) tf_ll_opt = session.run(tf_graph, feed_dict={ data_placeholder: data }).reshape(-1, 1) tf_graph_to_spn(variable_dict) py_ll_opt = log_likelihood(spn, data) # print(tf_ll_opt.sum(), py_ll_opt.sum()) self.assertTrue(np.all(np.isclose(tf_ll_opt, py_ll_opt))) self.assertLess(py_ll.sum(), tf_ll_opt.sum())
def test_eval_histogram(self): np.random.seed(17) data = np.random.normal(10, 0.01, size=2000).tolist() + np.random.normal( 30, 10, size=2000).tolist() data = np.array(data).reshape((-1, 10)) data[data < 0] = 0 data = data.astype(int) ds_context = Context(meta_types=[MetaType.DISCRETE] * data.shape[1]) ds_context.add_domains(data) spn = learn_mspn(data, ds_context) ll = log_likelihood(spn, data) tf_graph, data_placeholder, variable_dict = spn_to_tf_graph(spn, data) tf_ll = eval_tf(tf_graph, data_placeholder, data) self.assertTrue(np.all(np.isclose(ll, tf_ll)))
if __name__ == '__main__': add_histogram_inference_support() np.random.seed(17) data = np.random.normal(10, 0.01, size=2000).tolist() + np.random.normal( 30, 10, size=2000).tolist() data = np.array(data).reshape((-1, 10)) data[data < 0] = 0 data = (data * 1).astype(int) ds_context = Context(meta_types=[MetaType.DISCRETE] * data.shape[1]) ds_context.add_domains(data) data[:, 0] = 0 data[:, 1] = 1 spn = learn(data, ds_context) spn = create_histogram_leaf(data[:, 0].reshape((-1, 1)), ds_context, [0], alpha=False, hist_source="kde") * \ create_histogram_leaf(data[:, 1].reshape((-1, 1)), ds_context, [1], alpha=False, hist_source="kde") spn = 0.3 * create_histogram_leaf(data[:, 0].reshape((-1, 1)), ds_context, [0], alpha=False, hist_source="kde") + \ 0.7 * create_histogram_leaf(data[:, 0].reshape((-1, 1)), ds_context, [0], alpha=False, hist_source="kde") py_ll = log_likelihood(spn, data) tf_graph, placeholder = spn_to_tf_graph(spn, data) log_tf_out = eval_tf(tf_graph, placeholder, data) print("results are similar for Log TF and Python?", np.all(np.isclose(py_ll, log_tf_out)))