def probs(spn, ranges): ranges = np.array(ranges) return Inference.likelihood( spn, ranges, dtype=np.float64, node_likelihood=inference_support_ranges).reshape(len(ranges))
def test_inference_results(self): np.random.seed(123) tf.set_random_seed(123) num_dims = 20 rg = region_graph.RegionGraph(range(num_dims)) for _ in range(0, 10): rg.random_split(2, 3) args = RAT_SPN.SpnArgs() args.normalized_sums = True spn = RAT_SPN.RatSpn(10, region_graph=rg, name="obj-spn", args=args) sess = tf.Session() sess.run(tf.global_variables_initializer()) dummy_input = np.random.normal(0.0, 1.2, [10, num_dims]) input_ph = tf.placeholder(tf.float32, [10, num_dims]) output_tensor = spn.forward(input_ph) tf_output = sess.run(output_tensor, feed_dict={input_ph: dummy_input}) output_nodes = spn.get_simple_spn(sess) simple_output = [] for node in output_nodes: simple_output.append(Inference.likelihood(node, dummy_input)) simple_output = np.stack(simple_output) deviation = simple_output / np.exp(tf_output) rel_error = np.abs(deviation - 1.0) # print(rel_error) self.assertTrue(np.all(rel_error < 1e-2))
@author: Alejandro Molina ''' from spn.algorithms import Inference from spn.algorithms.StructureLearning import learn_structure from spn.algorithms.splitting.Clustering import get_split_rows_KMeans from spn.algorithms.splitting.RDC import get_split_cols_RDC from spn.data.datasets import get_nips_data from spn.structure.Base import Context from spn.structure.leaves.Histograms import add_domains, create_histogram_leaf if __name__ == '__main__': import numpy as np ds_name, words, data, train, _, statistical_type, _ = get_nips_data() print(words) print(data) ds_context = Context() ds_context.statistical_type = np.asarray(["discrete"] * data.shape[1]) add_domains(data, ds_context) spn = learn_structure(data, ds_context, get_split_rows_KMeans(), get_split_cols_RDC(), create_histogram_leaf) # print(to_str_equation(spn, words)) print(Inference.likelihood(spn, data[0:100, :]))
def visualize_Density_2d(spn): from spn.experiments.AQP.Ranges import NominalRange, NumericRange from spn.algorithms import Inference from simple_spn.InferenceRange import categorical_likelihood_range, gaussian_likelihood_range from simple_spn.UpdateRange import categorical_update_range from spn.experiments.AQP.Ranges import NominalRange, NumericRange from spn.structure.Base import Sum, Product from spn.algorithms.Inference import sum_likelihood, prod_likelihood from spn.structure.leaves.parametric.Parametric import Gaussian, Categorical distribution_update_ranges = { Gaussian: None, Categorical: categorical_update_range } inference_support_ranges = { Gaussian: gaussian_likelihood_range, Categorical: categorical_likelihood_range, Sum: sum_likelihood, Product: prod_likelihood } import matplotlib.pyplot as plt _, axes = plt.subplots(1, 3, figsize=(15, 10), squeeze=False, sharey=False, sharex=True) x_vals = np.linspace(0, 1, num=50) y_vals = np.linspace(0, 1, num=50) X, Y = np.meshgrid(x_vals, y_vals) ranges = [] vals = [] for y_val in y_vals: print(y_val) ranges = [] for x_val in x_vals: ranges.append([ NumericRange([[x_val]]), NumericRange([[y_val]]), None, None, None, None ]) ranges = np.array(ranges) densities = Inference.likelihood( spn, data=ranges, dtype=np.float64, node_likelihood=inference_support_ranges)[:, 0] for i, d in enumerate(densities): if d > 5: densities[i] = 5 vals.append(densities) vals = np.array(vals) axes[0][0].contour(X, Y, vals) axes[0][0].set_xlabel("Method1") axes[0][0].set_ylabel("Method2") axes[0][0].set_title("Overall") evidence = [None, None, None, None, None, NominalRange([0])] prob_no_alarm, spn_no_alarm = spn_for_evidence( spn, evidence, node_likelihood=inference_support_ranges, distribution_update_ranges=distribution_update_ranges) print(prob_no_alarm) ranges = [] vals = [] for y_val in y_vals: print(y_val) ranges = [] for x_val in x_vals: ranges.append([ NumericRange([[x_val]]), NumericRange([[y_val]]), None, None, None, None ]) ranges = np.array(ranges) densities = Inference.likelihood( spn_no_alarm, data=ranges, dtype=np.float64, node_likelihood=inference_support_ranges)[:, 0] for i, d in enumerate(densities): if d > 5: densities[i] = 5 vals.append(densities) vals = np.array(vals) axes[0][1].contour(X, Y, vals) axes[0][1].set_xlabel("Method1") axes[0][1].set_ylabel("Method2") axes[0][1].set_title("Keine Epidemie") evidence = [None, None, None, None, None, NominalRange([1])] prob_alarm, spn_alarm = spn_for_evidence( spn, evidence, node_likelihood=inference_support_ranges, distribution_update_ranges=distribution_update_ranges) print(prob_alarm) ranges = [] vals = [] for y_val in y_vals: print(y_val) ranges = [] for x_val in x_vals: ranges.append([ NumericRange([[x_val]]), NumericRange([[y_val]]), None, None, None, None ]) ranges = np.array(ranges) densities = Inference.likelihood( spn_alarm, data=ranges, dtype=np.float64, node_likelihood=inference_support_ranges)[:, 0] for i, d in enumerate(densities): if d > 5: densities[i] = 5 vals.append(densities) vals = np.array(vals) axes[0][2].contour(X, Y, vals) axes[0][2].set_xlabel("Method1") axes[0][2].set_ylabel("Method2") axes[0][2].set_title("Epidemie") plt.savefig("cdp.pdf") plt.show()
def visualize_Density(spn): from spn.experiments.AQP.Ranges import NominalRange, NumericRange from spn.algorithms import Inference from simple_spn.InferenceRange import categorical_likelihood_range, gaussian_likelihood_range from spn.structure.Base import Sum, Product from spn.algorithms.Inference import sum_likelihood, prod_likelihood from spn.structure.leaves.parametric.Parametric import Gaussian, Categorical from simple_spn.UpdateRange import categorical_update_range inference_support_ranges = { Gaussian: None, Categorical: categorical_likelihood_range, Sum: sum_likelihood, Product: prod_likelihood } distribution_update_ranges = { Gaussian: None, Categorical: categorical_update_range } import matplotlib.pyplot as plt _, axes = plt.subplots(1, 5, figsize=(15, 10), squeeze=False, sharey=False, sharex=True) space_start = 0.00 space_end = 1.0 steps = 100 max_y = 5 for i in range(5): x_vals = np.linspace(space_start, space_end, num=steps) ranges = [] for x_val in x_vals: r = [None] * i + [NumericRange([[x_val]])] + [None] * (5 - i) ranges.append(r) ranges = np.array(ranges) inference_support_ranges = { Gaussian: gaussian_likelihood_range, Categorical: categorical_likelihood_range, Sum: sum_likelihood, Product: prod_likelihood } y_vals = Inference.likelihood( spn, data=ranges, dtype=np.float64, node_likelihood=inference_support_ranges)[:, 0] axes[0][i].plot(x_vals, y_vals) axes[0][i].set_title("Method " + str(i) + " All") axes[0][i].set_ylim([0, max_y]) evidence = [None, None, None, None, None, NominalRange([0])] prob_no_alarm, spn_no_alarm = spn_for_evidence( spn, evidence, node_likelihood=inference_support_ranges, distribution_update_ranges=distribution_update_ranges) print(prob_no_alarm) for i in range(5): x_vals = np.linspace(space_start, space_end, num=steps) ranges = [] for x_val in x_vals: r = [None] * i + [NumericRange([[x_val]])] + [None] * (5 - i) ranges.append(r) ranges = np.array(ranges) inference_support_ranges = { Gaussian: gaussian_likelihood_range, Categorical: categorical_likelihood_range, Sum: sum_likelihood, Product: prod_likelihood } y_vals = Inference.likelihood( spn_no_alarm, data=ranges, dtype=np.float64, node_likelihood=inference_support_ranges)[:, 0] axes[0][i].plot(x_vals, y_vals, label="No Alarm", linestyle=":") evidence = [None, None, None, None, None, NominalRange([1])] prob_alarm, spn_alarm = spn_for_evidence( spn, evidence, node_likelihood=inference_support_ranges, distribution_update_ranges=distribution_update_ranges) print(prob_alarm) for i in range(5): x_vals = np.linspace(space_start, space_end, num=steps) ranges = [] for x_val in x_vals: r = [None] * i + [NumericRange([[x_val]])] + [None] * (5 - i) ranges.append(r) ranges = np.array(ranges) inference_support_ranges = { Gaussian: gaussian_likelihood_range, Categorical: categorical_likelihood_range, Sum: sum_likelihood, Product: prod_likelihood } y_vals = Inference.likelihood( spn_alarm, data=ranges, dtype=np.float64, node_likelihood=inference_support_ranges)[:, 0] axes[0][i].plot(x_vals, y_vals, label="Alarm") plt.legend() plt.tight_layout() plt.savefig("pdp.pdf") plt.show() spn_util.plot_spn(spn, "pval.pdf") tmp = get_nodes_with_weight(spn, 5) for (weight, node) in tmp: print(str(round(node.p[1], 2)) + "\t" + str(weight))
def probs_spflow(spn, data): return Inference.likelihood(spn, data, dtype=np.float64).reshape(len(data))
inference_support_ranges = {PiecewiseLinear : piecewise_likelihood_range, Categorical : categorical_likelihood_range, IdentityNumeric : identity_likelihood_range, Sum : sum_likelihood, Product : prod_likelihood} #Use None instead of np.nan ranges = np.array([[None, None, None], #Without any conditions [NominalRange([0]), None, None], #Only male [NominalRange([0]), NominalRange([1]), None], #Only male and student [NominalRange([0]), NominalRange([1]), NumericRange([[21,100]])], #Only male and student and older than 21 [NominalRange([0]), NominalRange([1]), NumericRange([[10,15], [25,100]])]] #Only male and student and age between 10 and 17 or 21 and 100 ) probabilities = Inference.likelihood(root_node, ranges, dtype=np.float64, node_likelihood=inference_support_ranges) print("Probabilities:") print(probabilities) print() #Sampling for given ranges from spn.algorithms import SamplingRange from spn.structure.leaves.piecewise.SamplingRange import sample_piecewise_node from spn.structure.leaves.parametric.SamplingRange import sample_categorical_node from spn.experiments.AQP.leaves.identity.SamplingRange import sample_identity_node node_sample_support = {PiecewiseLinear : sample_piecewise_node, Categorical : sample_categorical_node,
def extract_rules(spn, feature_id=1): from spn.experiments.AQP.Ranges import NominalRange from spn.algorithms import Inference from simple_spn.internal.InferenceRange import categorical_likelihood_range from spn.structure.Base import Sum, Product from spn.algorithms.Inference import sum_likelihood, prod_likelihood from spn.structure.leaves.parametric.Parametric import Categorical inference_support_ranges = {Categorical : categorical_likelihood_range, Sum : sum_likelihood, Product : prod_likelihood} freq_items = get_frequent_items(spn, min_support=0.0) freq_items_filtered = freq_items#filter(lambda x : any(cond[0] == feature_id for cond in x[1]), freq_items) freq_items_sorted = sorted(freq_items_filtered, key=lambda x: x[0], reverse=True) #evidence = numpy.empty((3,3,) feature_dict = {0: ("g", ("m ", "w ")), 1: ("c", ("no ", "yes")), 2: ("s", ("no ", "yes")), 3: ("w", ("no ", "yes"))} freq_sets = [] for (sup, conds) in freq_items_sorted: str_conds=[] ranges = [None] * len(spn.scope) for cond in conds: ranges[cond[0]] = NominalRange([cond[1]]) str_conds.append(feature_dict[cond[0]][0] + "=" + feature_dict[cond[0]][1][cond[1]]) ranges = np.array([ranges]) sup_spn = Inference.likelihood(spn, data=ranges, dtype=np.float64, node_likelihood=inference_support_ranges)[:,0][0] freq_sets.append(["(" + ", ".join(str_conds) + ")", sup, sup_spn]) rules = sorted(freq_sets, key=lambda x : x[2], reverse=True) rule_df = pd.DataFrame(rules, columns=["frequent set", "s_support", "g_support"]) io.print_pretty_table(rule_df.head(400)) exit() rules = [] for (sup, conds) in freq_items_sorted: rule_body = [] rule_head = [] conf = np.nan ranges = [None] * len(spn.scope) for cond in conds: if cond[0] == feature_id: rule_head.append(feature_dict[cond[0]][0] + "=" + feature_dict[cond[0]][1][cond[1]]) else: rule_body.append(feature_dict[cond[0]][0] + "=" + feature_dict[cond[0]][1][cond[1]]) ranges[cond[0]] = NominalRange([cond[1]]) #Optimization possible ranges = np.array([ranges]) prob_with_feature = Inference.likelihood(spn, data=ranges, dtype=np.float64, node_likelihood=inference_support_ranges)[:,0][0] ranges[0][feature_id] = None prob_without_feature = Inference.likelihood(spn, data=ranges, dtype=np.float64, node_likelihood=inference_support_ranges)[:,0][0] spn_sup = prob_without_feature spn_conf = prob_with_feature / prob_without_feature rules.append([" AND ".join(rule_body) + "-->" + " AND ".join(rule_head), sup, conf, spn_sup, spn_conf, spn_sup*spn_conf]) rules = sorted(rules, key=lambda x : x[5], reverse=True) rule_df = pd.DataFrame(rules, columns=["Rule", "c_Support", "c_Confidence", "spn_Support", "spn_Confidence", "score"]) #rule_df.drop_duplicates(subset=["Rule"], keep = True, inplace = True) io.print_pretty_table(rule_df.head(400)) pass
words = myfile.readline().strip() words = words[2:] words = words.split(';') # print(eq) print(words) spn = str_to_spn(eq, words) print(get_structure_stats(spn)) # print(Text.toJSON(spn)) data = np.loadtxt("40_testdata.txt", delimiter=';') ll = Inference.likelihood(spn, data) print(ll) print("average LL", np.mean(ll)) ds_name, words, data, _, _, _, _ = get_nips_data() top_n_features = 40 train, test = train_test_split(data[:, 0:top_n_features], test_size=0.2, random_state=42) ll = Inference.likelihood(spn, test) print("average LL2", np.mean(ll))