def test_sampling(): spn = example_spns.get_gender_spn() ''' Always same random number generator ''' samples = fn.sampling(spn, n_samples=10, random_seed=1) print(samples) samples = fn.sampling_rang(spn, rang=[None, None, None, None], n_samples=10, random_seed=1) print(samples) samples = fn.sampling_rang( spn, rang=[None, None, NumericRange([[10, 11], [29, 30]])], n_samples=10, random_seed=1) print(samples) samples = fn.sampling_rang( spn, rang=[NominalRange([0]), None, NumericRange([[14, 15], [29, 30]])], n_samples=10, random_seed=1) print(samples)
def test_sample_range(self): val = 20 scope = [0] node = create_static_leaf(val, scope) samples = SamplingRange.sample_static_node(node, 10) self.assertAlmostEqual(np.average(samples), 20) rang = NumericRange([[20, 20.321]]) ranges = np.array([rang]) samples = SamplingRange.sample_static_node(node, 10, ranges=ranges) self.assertAlmostEqual(np.average(samples), 20) rang = NumericRange([[19, 20]]) ranges = np.array([rang]) samples = SamplingRange.sample_static_node(node, 10, ranges=ranges) self.assertAlmostEqual(np.average(samples), 20) rang = NumericRange([[19, 19.5], [19.999, 20.111], [20.5, 21]]) ranges = np.array([rang]) samples = SamplingRange.sample_static_node(node, 10, ranges=ranges) self.assertAlmostEqual(np.average(samples), 20) rang = NumericRange([[19, 19.5]]) ranges = np.array([rang]) samples = SamplingRange.sample_static_node(node, 10, ranges=ranges) self.assertTrue(all(np.isnan(samples)))
def test_sample_range(self): np.random.seed(10) data = np.random.normal(20, scale=5, size=1000).reshape((1000, 1)) numpy_data = np.array(data, np.float64) meta_types = [MetaType.REAL] domains = [[np.min(numpy_data[:, 0]), np.max(numpy_data[:, 0])]] ds_context = Context(meta_types=meta_types, domains=domains) rand_gen = np.random.RandomState(100) pwl = create_piecewise_leaf(data, ds_context, scope=[0], prior_weight=None) rang = [NumericRange([[20]])] ranges = np.array(rang) samples = SamplingRange.sample_piecewise_node(pwl, 10, rand_gen, ranges) self.assertEqual(len(samples), 10) self.assertAlmostEqual(np.average(samples), 20) rang = [NumericRange([[20, 100]])] ranges = np.array(rang) samples = SamplingRange.sample_piecewise_node(pwl, 10, rand_gen, ranges) self.assertTrue(all(samples[samples > 20])) self.assertTrue(all(samples[samples < 100])) rang = [NumericRange([[10, 13], [20, 100]])] ranges = np.array(rang) samples = SamplingRange.sample_piecewise_node(pwl, 10, rand_gen, ranges) self.assertFalse( any(samples[np.where((samples > 13) & (samples < 20))])) self.assertFalse(any(samples[samples < 10]))
def test_marg(): spn = example_spns.get_gender_spn() spn1 = fn.marg(spn, [2]) fn.plot_spn(spn1, "marg1.pdf") spn2 = fn.marg(spn, [0]) fn.plot_spn(spn2, "marg2.pdf") spn3 = fn.marg(spn, [1]) fn.plot_spn(spn3, "marg3.pdf") spn4 = fn.marg(spn, [1, 2]) fn.plot_spn(spn4, "marg4.pdf") rang = [None, NominalRange([1]), None] prob, spn5 = fn.marg_rang(spn, rang) fn.plot_spn(spn5, "marg5.pdf") rang = [None, NominalRange([1]), NumericRange([[10, 12]])] prob, spn6 = fn.marg_rang(spn, rang) fn.plot_spn(spn6, "marg6.pdf") rang = [NominalRange([0]), NominalRange([1]), None] prob = fn.prob(spn, rang) print(prob) prob = fn.prob(spn6, rang) print(prob)
def visualize_density(spn, value_dict, rang=None, n_steps=50, max_density=None, save_path=None): #Only select numeric features selected_features = [] for feature_id in spn.scope: if value_dict[feature_id][0] == "numeric": selected_features.append(feature_id) #Create ranges if rang is None: rang = np.array([None] * (max(spn.scope) + 1)) ranges = [] for i, feature_id in enumerate(selected_features): for x_val in np.linspace(value_dict[feature_id][2][0], value_dict[feature_id][2][1], num=n_steps): n_rang = rang.copy() n_rang[feature_id] = NumericRange([[x_val]]) ranges.append(n_rang) #Evaluate densities res = fn.probs(spn, np.array(ranges)) #Visualize ncols = 1 nrows = len(selected_features) figsize_x = 16 figsize_y = 6 * len(selected_features) _, axes = plt.subplots(nrows, ncols, figsize=(figsize_x, figsize_y), squeeze=False, sharey=True, sharex=False) for i, feature_id in enumerate(selected_features): plot = axes[i][0] x_vals = np.linspace(value_dict[feature_id][2][0], value_dict[feature_id][2][1], num=n_steps) y_vals = res[n_steps * i:n_steps * i + n_steps] plot.plot(x_vals, y_vals) if max_density is not None: plot.set_ylim(0, max_density) plot.set_title(value_dict[feature_id][1]) plt.tight_layout() if save_path is None: plt.show() else: plt.savefig(save_path)
def test_inference_range(self): np.random.seed(10) data = np.random.normal(20, scale=5, size=1000).reshape((1000, 1)) numpy_data = np.array(data, np.float64) meta_types = [MetaType.REAL] domains = [[np.min(numpy_data[:, 0]), np.max(numpy_data[:, 0])]] ds_context = Context(meta_types=meta_types, domains=domains) pwl = create_piecewise_leaf(data, ds_context, scope=[0], prior_weight=None) rang = [NumericRange([[20]])] ranges = np.array([rang]) prob = InferenceRange.piecewise_likelihood_range(pwl, ranges)[0][0] self.assertAlmostEqual(prob, 0.086475210674) rang = [NumericRange([[21]])] ranges = np.array([rang]) prob = InferenceRange.piecewise_likelihood_range(pwl, ranges)[0][0] self.assertAlmostEqual(prob, 0.0855907611968) rang = [NumericRange([[19]])] ranges = np.array([rang]) prob = InferenceRange.piecewise_likelihood_range(pwl, ranges)[0][0] self.assertAlmostEqual(prob, 0.0833451329643) rang = [NumericRange([[-20]])] ranges = np.array([rang]) prob = InferenceRange.piecewise_likelihood_range(pwl, ranges)[0][0] self.assertAlmostEqual(prob, 0) rang = [NumericRange([[20, 100]])] ranges = np.array([rang]) prob = InferenceRange.piecewise_likelihood_range(pwl, ranges)[0][0] self.assertAlmostEqual(prob, 0.493416517396) rang = [NumericRange([[-20, 20]])] ranges = np.array([rang]) prob = InferenceRange.piecewise_likelihood_range(pwl, ranges)[0][0] self.assertAlmostEqual(prob, 0.506583482604) rang = [NumericRange([[-20, 100]])] ranges = np.array([rang]) prob = InferenceRange.piecewise_likelihood_range(pwl, ranges)[0][0] self.assertAlmostEqual(prob, 1) rang = [NumericRange([[-20, -10]])] ranges = np.array([rang]) prob = InferenceRange.piecewise_likelihood_range(pwl, ranges)[0][0] self.assertAlmostEqual(prob, 0)
def test_prob(): spn = example_spns.get_gender_spn() rang = [None, None, None] prob = fn.prob(spn, rang) print(prob) rang = [NominalRange([0]), NominalRange([1]), NumericRange([[20]])] prob = fn.prob(spn, rang) print(prob) ranges = np.array([[None, None, NumericRange([[0, 20]])], [NominalRange([0]), None, None], [None, NominalRange([1]), None]]) probs = fn.probs(spn, ranges) print(probs) inst = [0, np.nan, np.nan] prob = fn.prob_spflow(spn, inst) print(prob) data = np.array([[0, np.nan, np.nan], [0, 1, np.nan]]) probs = fn.probs_spflow(spn, data) print(probs)
def _generate_conds(target_id, value_dict, numeric_intervals=10): conds = [] labels = [] if value_dict[target_id][0] == "discrete": for val in sorted(value_dict[target_id][2]): conds.append(NominalRange([val])) labels.append(value_dict[target_id][2][val]) elif value_dict[target_id][0] == "numeric": val_space = np.linspace(value_dict[target_id][2][0], value_dict[target_id][2][1], numeric_intervals + 1) for interval in zip(val_space[1:], val_space[:-1]): conds.append(NumericRange([list(interval)])) labels.append(str(list(interval))) else: raise Exception( "Not implemented for other than discrete or numeric ...: " + str(value_dict[target_id][0])) return conds, labels
def test_sample2(): #Create distribution y_range = [0., 10, 100, 30, 10, 200, 0.] x_range = [0., 2, 4., 6, 8, 10, 12] x_range, y_range = np.array(x_range), np.array(y_range) auc = np.trapz(y_range, x_range) y_range = y_range / auc rand_gen = np.random.RandomState(10) ranges = [NumericRange([[0., 4.], [9., 12.]])] t0 = time.time() cumulative_stats, samples = sample(x_range, y_range, ranges, 100000, rand_gen) exc_time = time.time()-t0 print("cum_sampling: " + str(exc_time)) #Plot distribution plt.title("Actual distribution") plt.plot(x_range, y_range) plt.show() plt.hist(samples, bins=50) plt.show() #Plot inverse cumulative distribution x_domain = np.linspace(0, 1, 100) y_domain = np.zeros(len(x_domain)) for i, x_val in enumerate(x_domain): y_domain[i] = inverse_cumulative(cumulative_stats, x_val) plt.title("Inverse cumulative distribution") plt.plot(x_domain, y_domain) plt.show()
def classify_dataset(spn, target_id, df, transform=False, value_dict=None, epsilon=0.01): if value_dict is None: value_dict = generate_adhoc_value_dict(spn) sorted_scope = sorted(spn.scope) if transform: inv_val_dict = { v[1]: {v2: k2 for k2, v2 in v[2].items()} for _, v in value_dict.items() if v[0] == "discrete" } for col_name, map_dict in inv_val_dict.items(): df[col_name] = df[col_name].map(map_dict) values = np.array(df.values) ranges = np.full(shape=(len(values), np.max(spn.scope) + 1), fill_value=None) for i, col in enumerate(values.T): f_id = sorted_scope[i] if f_id == target_id: continue if value_dict[f_id][0] == "discrete": for j, v in enumerate(col): ranges[j, f_id] = NominalRange([v]) elif value_dict[f_id][0] == "numeric": bound = epsilon * (value_dict[f_id][2][1] - value_dict[f_id][2][0]) for j, v in enumerate(col): ranges[j, f_id] = NumericRange([[v - bound, v + bound]]) else: raise Exception("Unknown attribute-type: " + str(value_dict[f_id][0])) return classifies(spn, target_id, ranges, value_dict)
def visualize_Density(spn): from spn.experiments.AQP.Ranges import NominalRange, NumericRange from spn.algorithms import Inference from simple_spn.InferenceRange import categorical_likelihood_range, gaussian_likelihood_range from spn.structure.Base import Sum, Product from spn.algorithms.Inference import sum_likelihood, prod_likelihood from spn.structure.leaves.parametric.Parametric import Gaussian, Categorical from simple_spn.UpdateRange import categorical_update_range inference_support_ranges = { Gaussian: None, Categorical: categorical_likelihood_range, Sum: sum_likelihood, Product: prod_likelihood } distribution_update_ranges = { Gaussian: None, Categorical: categorical_update_range } import matplotlib.pyplot as plt _, axes = plt.subplots(1, 5, figsize=(15, 10), squeeze=False, sharey=False, sharex=True) space_start = 0.00 space_end = 1.0 steps = 100 max_y = 5 for i in range(5): x_vals = np.linspace(space_start, space_end, num=steps) ranges = [] for x_val in x_vals: r = [None] * i + [NumericRange([[x_val]])] + [None] * (5 - i) ranges.append(r) ranges = np.array(ranges) inference_support_ranges = { Gaussian: gaussian_likelihood_range, Categorical: categorical_likelihood_range, Sum: sum_likelihood, Product: prod_likelihood } y_vals = Inference.likelihood( spn, data=ranges, dtype=np.float64, node_likelihood=inference_support_ranges)[:, 0] axes[0][i].plot(x_vals, y_vals) axes[0][i].set_title("Method " + str(i) + " All") axes[0][i].set_ylim([0, max_y]) evidence = [None, None, None, None, None, NominalRange([0])] prob_no_alarm, spn_no_alarm = spn_for_evidence( spn, evidence, node_likelihood=inference_support_ranges, distribution_update_ranges=distribution_update_ranges) print(prob_no_alarm) for i in range(5): x_vals = np.linspace(space_start, space_end, num=steps) ranges = [] for x_val in x_vals: r = [None] * i + [NumericRange([[x_val]])] + [None] * (5 - i) ranges.append(r) ranges = np.array(ranges) inference_support_ranges = { Gaussian: gaussian_likelihood_range, Categorical: categorical_likelihood_range, Sum: sum_likelihood, Product: prod_likelihood } y_vals = Inference.likelihood( spn_no_alarm, data=ranges, dtype=np.float64, node_likelihood=inference_support_ranges)[:, 0] axes[0][i].plot(x_vals, y_vals, label="No Alarm", linestyle=":") evidence = [None, None, None, None, None, NominalRange([1])] prob_alarm, spn_alarm = spn_for_evidence( spn, evidence, node_likelihood=inference_support_ranges, distribution_update_ranges=distribution_update_ranges) print(prob_alarm) for i in range(5): x_vals = np.linspace(space_start, space_end, num=steps) ranges = [] for x_val in x_vals: r = [None] * i + [NumericRange([[x_val]])] + [None] * (5 - i) ranges.append(r) ranges = np.array(ranges) inference_support_ranges = { Gaussian: gaussian_likelihood_range, Categorical: categorical_likelihood_range, Sum: sum_likelihood, Product: prod_likelihood } y_vals = Inference.likelihood( spn_alarm, data=ranges, dtype=np.float64, node_likelihood=inference_support_ranges)[:, 0] axes[0][i].plot(x_vals, y_vals, label="Alarm") plt.legend() plt.tight_layout() plt.savefig("pdp.pdf") plt.show() spn_util.plot_spn(spn, "pval.pdf") tmp = get_nodes_with_weight(spn, 5) for (weight, node) in tmp: print(str(round(node.p[1], 2)) + "\t" + str(weight))
def evaluate_numeric_density_leaf(leaf, x_vals): f_id = leaf.scope[0] ranges = np.array([f_id * [None] + [NumericRange([[x]])] for x in x_vals]) return probs(leaf, ranges)
prob_rang = prob_ranges[unique[i]] n_vals = counts[i] p_samples += list((prob_rang[1] - prob_rang[0]) * rand_gen.random_sample(size=n_vals) + prob_rang[0]) return norm.ppf(p_samples, loc=node.mean, scale=node.stdev) if __name__ == '__main__': g = Gaussian(mean=10, stdev=2, scope=[0]) samples = sample_gaussian_node(g, 5, np.random.RandomState(1), ranges=None) print(samples) ranges = np.array([NumericRange([[0,10]])]) samples = sample_gaussian_node(g, 100, np.random.RandomState(1), ranges=ranges) print(samples)
#Import inference from spn.algorithms import Inference from spn.algorithms.Inference import sum_likelihood, prod_likelihood inference_support_ranges = {PiecewiseLinear : piecewise_likelihood_range, Categorical : categorical_likelihood_range, IdentityNumeric : identity_likelihood_range, Sum : sum_likelihood, Product : prod_likelihood} #Use None instead of np.nan ranges = np.array([[None, None, None], #Without any conditions [NominalRange([0]), None, None], #Only male [NominalRange([0]), NominalRange([1]), None], #Only male and student [NominalRange([0]), NominalRange([1]), NumericRange([[21,100]])], #Only male and student and older than 21 [NominalRange([0]), NominalRange([1]), NumericRange([[10,15], [25,100]])]] #Only male and student and age between 10 and 17 or 21 and 100 ) probabilities = Inference.likelihood(root_node, ranges, dtype=np.float64, node_likelihood=inference_support_ranges) print("Probabilities:") print(probabilities) print() #Sampling for given ranges from spn.algorithms import SamplingRange from spn.structure.leaves.piecewise.SamplingRange import sample_piecewise_node from spn.structure.leaves.parametric.SamplingRange import sample_categorical_node from spn.experiments.AQP.leaves.identity.SamplingRange import sample_identity_node
def visualize_Density_2d(spn): from spn.experiments.AQP.Ranges import NominalRange, NumericRange from spn.algorithms import Inference from simple_spn.InferenceRange import categorical_likelihood_range, gaussian_likelihood_range from simple_spn.UpdateRange import categorical_update_range from spn.experiments.AQP.Ranges import NominalRange, NumericRange from spn.structure.Base import Sum, Product from spn.algorithms.Inference import sum_likelihood, prod_likelihood from spn.structure.leaves.parametric.Parametric import Gaussian, Categorical distribution_update_ranges = { Gaussian: None, Categorical: categorical_update_range } inference_support_ranges = { Gaussian: gaussian_likelihood_range, Categorical: categorical_likelihood_range, Sum: sum_likelihood, Product: prod_likelihood } import matplotlib.pyplot as plt _, axes = plt.subplots(1, 3, figsize=(15, 10), squeeze=False, sharey=False, sharex=True) x_vals = np.linspace(0, 1, num=50) y_vals = np.linspace(0, 1, num=50) X, Y = np.meshgrid(x_vals, y_vals) ranges = [] vals = [] for y_val in y_vals: print(y_val) ranges = [] for x_val in x_vals: ranges.append([ NumericRange([[x_val]]), NumericRange([[y_val]]), None, None, None, None ]) ranges = np.array(ranges) densities = Inference.likelihood( spn, data=ranges, dtype=np.float64, node_likelihood=inference_support_ranges)[:, 0] for i, d in enumerate(densities): if d > 5: densities[i] = 5 vals.append(densities) vals = np.array(vals) axes[0][0].contour(X, Y, vals) axes[0][0].set_xlabel("Method1") axes[0][0].set_ylabel("Method2") axes[0][0].set_title("Overall") evidence = [None, None, None, None, None, NominalRange([0])] prob_no_alarm, spn_no_alarm = spn_for_evidence( spn, evidence, node_likelihood=inference_support_ranges, distribution_update_ranges=distribution_update_ranges) print(prob_no_alarm) ranges = [] vals = [] for y_val in y_vals: print(y_val) ranges = [] for x_val in x_vals: ranges.append([ NumericRange([[x_val]]), NumericRange([[y_val]]), None, None, None, None ]) ranges = np.array(ranges) densities = Inference.likelihood( spn_no_alarm, data=ranges, dtype=np.float64, node_likelihood=inference_support_ranges)[:, 0] for i, d in enumerate(densities): if d > 5: densities[i] = 5 vals.append(densities) vals = np.array(vals) axes[0][1].contour(X, Y, vals) axes[0][1].set_xlabel("Method1") axes[0][1].set_ylabel("Method2") axes[0][1].set_title("Keine Epidemie") evidence = [None, None, None, None, None, NominalRange([1])] prob_alarm, spn_alarm = spn_for_evidence( spn, evidence, node_likelihood=inference_support_ranges, distribution_update_ranges=distribution_update_ranges) print(prob_alarm) ranges = [] vals = [] for y_val in y_vals: print(y_val) ranges = [] for x_val in x_vals: ranges.append([ NumericRange([[x_val]]), NumericRange([[y_val]]), None, None, None, None ]) ranges = np.array(ranges) densities = Inference.likelihood( spn_alarm, data=ranges, dtype=np.float64, node_likelihood=inference_support_ranges)[:, 0] for i, d in enumerate(densities): if d > 5: densities[i] = 5 vals.append(densities) vals = np.array(vals) axes[0][2].contour(X, Y, vals) axes[0][2].set_xlabel("Method1") axes[0][2].set_ylabel("Method2") axes[0][2].set_title("Epidemie") plt.savefig("cdp.pdf") plt.show()
def test_inference_range(self): val = 20 scope = [0] node = create_static_leaf(val, scope) rang = [NumericRange([[20]])] ranges = np.array([rang]) prob = InferenceRange.static_likelihood_range(node, ranges)[0][0] self.assertAlmostEqual(prob, 1) rang = [NumericRange([[19.2]])] ranges = np.array([rang]) prob = InferenceRange.static_likelihood_range(node, ranges)[0][0] self.assertAlmostEqual(prob, 0) rang = [NumericRange([[20.0003]])] ranges = np.array([rang]) prob = InferenceRange.static_likelihood_range(node, ranges)[0][0] self.assertAlmostEqual(prob, 0) rang = [NumericRange([[0]])] ranges = np.array([rang]) prob = InferenceRange.static_likelihood_range(node, ranges)[0][0] self.assertAlmostEqual(prob, 0) rang = [NumericRange([[0, 10]])] ranges = np.array([rang]) prob = InferenceRange.static_likelihood_range(node, ranges)[0][0] self.assertAlmostEqual(prob, 0) rang = [NumericRange([[0, 200]])] ranges = np.array([rang]) prob = InferenceRange.static_likelihood_range(node, ranges)[0][0] self.assertAlmostEqual(prob, 1) rang = [NumericRange([[19.99999, 20.11]])] ranges = np.array([rang]) prob = InferenceRange.static_likelihood_range(node, ranges)[0][0] self.assertAlmostEqual(prob, 1) rang = [NumericRange([[19.99999, 20]])] ranges = np.array([rang]) prob = InferenceRange.static_likelihood_range(node, ranges)[0][0] self.assertAlmostEqual(prob, 1) rang = [NumericRange([[20, 20.321]])] ranges = np.array([rang]) prob = InferenceRange.static_likelihood_range(node, ranges)[0][0] self.assertAlmostEqual(prob, 1) rang = [NumericRange([[19, 19.5], [20.5, 21]])] ranges = np.array([rang]) prob = InferenceRange.static_likelihood_range(node, ranges)[0][0] self.assertAlmostEqual(prob, 0) rang = [NumericRange([[19, 19.5], [19.999, 20.111], [20.5, 21]])] ranges = np.array([rang]) prob = InferenceRange.static_likelihood_range(node, ranges)[0][0] self.assertAlmostEqual(prob, 1)
x = [0., 1., 2., 3., 4.] y = [0., 0., 0., 10., 0.] x, y = np.array(x), np.array(y) auc = np.trapz(y, x) y = y / auc node4 = PiecewiseLinear(x_range=x, y_range=y, bin_repr_points=x[1:-1], scope=[1]) root_node = 0.49 * (node1 * node3) + 0.51 * (node2 * node4) #Set context #meta_types = [MetaType.DISCRETE, MetaType.REAL] #domains = [[0,1],[0.,4.]] #ds_context = Context(meta_types=meta_types, domains=domains) inference_support_ranges = {PiecewiseLinear : piecewise_likelihood_range, Categorical : categorical_likelihood_range} node_sample = {Categorical : sample_categorical_node, PiecewiseLinear : sample_piecewise_node} ranges = [NominalRange([0]),None] samples = SamplingRange.sample_instances(root_node, 2, 30, rand_gen, ranges=ranges, node_sample=node_sample, node_likelihood=inference_support_ranges)#, return_Zs, return_partition, dtype) print("Samples: " + str(samples)) ranges = [NominalRange([0]),NumericRange([[3., 3.1], [3.5, 4.]])] samples = SamplingRange.sample_instances(root_node, 2, 30, rand_gen, ranges=ranges, node_sample=node_sample, node_likelihood=inference_support_ranges)#, return_Zs, return_partition, dtype) print("Samples: " + str(samples))
#plot spn fn.plot_spn(spn, "sample_spn.pdf", value_dict) #generate samples samples = fn.sampling(spn, n_samples=10, random_seed=1) print(samples) samples = fn.sampling_rang(spn, rang=[None, None, None, None], n_samples=10, random_seed=1) print(samples) samples = fn.sampling_rang( spn, rang=[None, None, NumericRange([[10, 11], [29, 30]])], n_samples=10, random_seed=1) print(samples) samples = fn.sampling_rang( spn, rang=[NominalRange([0]), None, NumericRange([[14, 15], [29, 30]])], n_samples=10, random_seed=1) print(samples) #Test probabilities rang = [None, None, None] prob = fn.prob(spn, rang)