def test_KMN_l2_regularization(self): mu = 5 std = 5 X, Y = self.get_samples(mu=mu, std=std, n_samples=500) kmn_no_reg = KernelMixtureNetwork("kmn_no_reg", 1, 1, n_centers=10, n_training_epochs=200, l2_reg=0.0, weight_normalization=False) kmn_reg_l2 = KernelMixtureNetwork("kmn_reg_l2", 1, 1, n_centers=10, hidden_sizes=(16, 16), n_training_epochs=200, l2_reg=1.0, weight_normalization=False) kmn_no_reg.fit(X, Y) kmn_reg_l2.fit(X, Y) y = np.arange(mu - 3 * std, mu + 3 * std, 6 * std / 20) x = np.asarray([mu for i in range(y.shape[0])]) p_true = norm.pdf(y, loc=mu, scale=std) err_no_reg = np.mean(np.abs(kmn_no_reg.pdf(x, y) - p_true)) err_reg_l2 = np.mean(np.abs(kmn_reg_l2.pdf(x, y) - p_true)) self.assertLessEqual(err_reg_l2, err_no_reg)
def test_KMN_with_2d_gaussian_2(self): mu = 200 std = 23 X, Y = self.get_samples(mu=mu, std=std) for method in ["agglomerative"]: with tf.Session() as sess: model = KernelMixtureNetwork("kmn2_" + method, 1, 1, center_sampling_method=method, n_centers=10, hidden_sizes=(16, 16), init_scales=np.array([1.0]), train_scales=True, data_normalization=True) model.fit(X, Y) y = np.arange(mu - 3 * std, mu + 3 * std, 6 * std / 20) x = np.asarray([mu for i in range(y.shape[0])]) p_est = model.pdf(x, y) p_true = norm.pdf(y, loc=mu, scale=std) self.assertLessEqual(np.mean(np.abs(p_true - p_est)), 0.1) p_est = model.cdf(x, y) p_true = norm.cdf(y, loc=mu, scale=std) self.assertLessEqual(np.mean(np.abs(p_true - p_est)), 0.1)
def test2_KMN_with_2d_gaussian_noise_x(self): np.random.seed(22) X = np.random.uniform(0, 6, size=4000) Y = X + np.random.normal(0, 1, size=4000) x_test_2 = np.ones(100) * 2 x_test_4 = np.ones(100) * 4 y_test = np.linspace(1, 5, num=100) with tf.Session(): model_no_noise = KernelMixtureNetwork("kmn_no_noise_x", 1, 1, n_centers=5, x_noise_std=None, y_noise_std=None) model_no_noise.fit(X, Y) pdf_distance_no_noise = np.mean( np.abs( model_no_noise.pdf(x_test_2, y_test) - model_no_noise.pdf(x_test_4, y_test))) model_noise = KernelMixtureNetwork("kmn_noise_x", 1, 1, n_centers=5, x_noise_std=2, y_noise_std=None) model_noise.fit(X, Y) pdf_distance_noise = np.mean( np.abs( model_noise.pdf(x_test_2, y_test) - model_noise.pdf(x_test_4, y_test))) print("Training w/o noise - pdf distance:", pdf_distance_no_noise) print("Training w/ noise - pdf distance", pdf_distance_noise) self.assertGreaterEqual(pdf_distance_no_noise / pdf_distance_noise, 2.0)
def test_1_KMN_with_2d_gaussian_fit_by_crossval(self): X, Y = self.get_samples() param_grid = { "n_centers": [3, 10], "center_sampling_method": ["k_means"], "keep_edges": [True] } model = KernelMixtureNetwork(center_sampling_method="k_means", n_centers=20) model.fit_by_cv(X, Y, param_grid=param_grid) y = np.arange(-1, 5, 0.5) x = np.asarray([2 for i in range(y.shape[0])]) p_est = model.pdf(x, y) p_true = norm.pdf(y, loc=2, scale=1) self.assertEqual(model.get_params()["n_centers"], 10) self.assertLessEqual(np.mean(np.abs(p_true - p_est)), 0.2)
def testPickleUnpickleKDN(self): X, Y = self.get_samples() with tf.Session() as sess: model = KernelMixtureNetwork("kde", 2, 2, n_centers=10, n_training_epochs=10, data_normalization=True, weight_normalization=True) model.fit(X, Y) pdf_before = model.pdf(X, Y) # pickle and unpickle model dump_string = pickle.dumps(model) tf.reset_default_graph() with tf.Session() as sess: model_loaded = pickle.loads(dump_string) pdf_after = model_loaded.pdf(X, Y) diff = np.sum(np.abs(pdf_after - pdf_before)) self.assertAlmostEqual(diff, 0, places=2)
def test_KMN_log_pdf(self): X, Y = np.random.normal(size=(1000, 3)), np.random.normal(size=(1000, 2)) for data_norm in [True, False]: with tf.Session() as sess: model = KernelMixtureNetwork("kmn_logprob" + str(data_norm), 3, 2, n_centers=5, hidden_sizes=(8, 8), init_scales=np.array([0.5]), n_training_epochs=10, data_normalization=data_norm) model.fit(X, Y) x, y = np.random.normal(size=(1000, 3)), np.random.normal(size=(1000, 2)) prob = model.pdf(x, y) log_prob = model.log_pdf(x, y) self.assertLessEqual(np.mean(np.abs(prob - np.exp(log_prob))), 0.001)
def plot_fitted_distribution(): n_observations = 1000 # number of data points n_features = 3 # number of features np.random.seed(22) X_train, X_test, Y_train, Y_test = econ_density.simulate(n_observations) model = KernelMixtureNetwork() X_train = np.random.normal(loc=0, size=[n_observations, 1]) Y_train = 3 * X_train + np.random.normal(loc=0, size=[n_observations, 1]) X_test = np.random.normal(loc=0, size=[100, 1]) Y_test = 3 * X_test + np.random.normal(loc=0, size=[100, 1]) model.fit(X_train, Y_train) print(model.score(X_test, Y_test)) #print(model.fit_by_cv(X_train, Y_train)) # plt.scatter(model.X_train, model.Y_test) # plt.scatter(model.centr_x, model.centr_y, s=10*model.alpha) # plt.show() # # fig, ax = plt.subplots() # fig.set_size_inches(10, 8) # sns.regplot(X_train, Y_train, fit_reg=False) # plt.show() # # n_samples = 1000 Y_plot = np.linspace(-10, 10, num=n_samples) X_plot = np.expand_dims(np.asarray([-1 for _ in range(n_samples)]), axis=1) result = model.pdf(X_plot, Y_plot) plt.plot(Y_plot, result) #plt.show() #2d plot X_plot = np.expand_dims(np.asarray([2 for _ in range(n_samples)]), axis=1) result = model.pdf(X_plot, Y_plot) plt.plot(Y_plot, result) plt.show() #3d plot n_samples = 100 linspace_x = np.linspace(-15, 15, num=n_samples) linspace_y = np.linspace(-15, 15, num=n_samples) X, Y = np.meshgrid(linspace_x, linspace_y) X, Y = X.flatten(), Y.flatten() Z = model.pdf(X, Y) X, Y, Z = X.reshape([n_samples, n_samples]), Y.reshape([n_samples, n_samples]), Z.reshape([n_samples, n_samples]) fig = plt.figure() ax = fig.gca(projection='3d') surf = ax.plot_surface(X, Y, Z, cmap=cm.coolwarm, linewidth=0, antialiased=True) plt.show()
seed = 22 density_simulator = SkewNormal(random_seed=seed) X, Y = density_simulator.simulate(n_samples=3000) """ fit density model """ model = KernelMixtureNetwork("KDE_demo", ndim_x=1, ndim_y=1, n_centers=50, x_noise_std=0.2, y_noise_std=0.1, random_seed=22) model.fit(X, Y) """ query the conditional pdf and cdf""" x_cond = np.zeros((1, 1)) y_query = np.ones((1, 1)) * 0.1 prob = model.pdf(x_cond, y_query) cum_prob = model.cdf(x_cond, y_query) """ compute conditional moments & VaR """ x_cond = np.zeros((1, 1)) mean = model.mean_(x_cond)[0][0] std = model.std_(x_cond)[0][0] skewness = model.skewness(x_cond)[0] VaR = model.value_at_risk(x_cond, alpha=0.01)[0] print("Mean:", mean) print("Std:", std) print("Skewness:", skewness) print("Value-at-Risk", VaR) """ plot the fitted distribution """ x_cond_plot = np.array([-0.5, 0, 0.5])