def test1_KMN_with_2d_gaussian_noise_y(self):
        X, Y = self.get_samples(std=0.5)

        with tf.Session():
            model_no_noise = KernelMixtureNetwork("kmn_no_noise_y",
                                                  1,
                                                  1,
                                                  n_centers=5,
                                                  x_noise_std=None,
                                                  y_noise_std=None)
            model_no_noise.fit(X, Y)
            var_no_noise = model_no_noise.covariance(
                x_cond=np.array([[2]]))[0][0][0]

            model_noise = KernelMixtureNetwork("kmn_noise_y",
                                               1,
                                               1,
                                               n_centers=5,
                                               x_noise_std=None,
                                               y_noise_std=1)
            model_noise.fit(X, Y)
            var_noise = model_noise.covariance(x_cond=np.array([[2]]))[0][0][0]

            print("Training w/o noise:", var_no_noise)
            print("Training w/ noise:", var_noise)

            self.assertGreaterEqual(var_noise - var_no_noise, 0.1)
示例#2
0
    def test_tail_risks_risk_mixture(self):
        X, Y = self.get_samples(std=0.5)
        model = KernelMixtureNetwork("kmn-var2",
                                     1,
                                     1,
                                     center_sampling_method="k_means",
                                     n_centers=5,
                                     n_training_epochs=50)
        model.fit(X, Y)

        x_cond = np.array([[0], [1]])

        VaR_mixture, CVaR_mixture = model.tail_risk_measures(x_cond,
                                                             alpha=0.07)
        VaR_cdf, CVaR_mc = BaseDensityEstimator.tail_risk_measures(model,
                                                                   x_cond,
                                                                   alpha=0.07)

        print("CVaR mixture:", CVaR_mixture)
        print("CVaR cdf:", CVaR_mc)

        diff_cvar = np.mean(np.abs(CVaR_mc - CVaR_mixture))
        self.assertAlmostEqual(diff_cvar, 0, places=1)

        diff_var = np.mean(np.abs(VaR_mixture - VaR_cdf))
        self.assertAlmostEqual(diff_var, 0, places=1)
    def test_KMN_l2_regularization(self):
        mu = 5
        std = 5
        X, Y = self.get_samples(mu=mu, std=std, n_samples=500)

        kmn_no_reg = KernelMixtureNetwork("kmn_no_reg",
                                          1,
                                          1,
                                          n_centers=10,
                                          n_training_epochs=200,
                                          l2_reg=0.0,
                                          weight_normalization=False)
        kmn_reg_l2 = KernelMixtureNetwork("kmn_reg_l2",
                                          1,
                                          1,
                                          n_centers=10,
                                          hidden_sizes=(16, 16),
                                          n_training_epochs=200,
                                          l2_reg=1.0,
                                          weight_normalization=False)
        kmn_no_reg.fit(X, Y)
        kmn_reg_l2.fit(X, Y)

        y = np.arange(mu - 3 * std, mu + 3 * std, 6 * std / 20)
        x = np.asarray([mu for i in range(y.shape[0])])
        p_true = norm.pdf(y, loc=mu, scale=std)
        err_no_reg = np.mean(np.abs(kmn_no_reg.pdf(x, y) - p_true))
        err_reg_l2 = np.mean(np.abs(kmn_reg_l2.pdf(x, y) - p_true))

        self.assertLessEqual(err_reg_l2, err_no_reg)
示例#4
0
    def test_conditional_value_at_risk_mixture(self):
        np.random.seed(20)
        X, Y = self.get_samples(std=0.5)
        model = KernelMixtureNetwork("kmn-var",
                                     1,
                                     1,
                                     center_sampling_method="k_means",
                                     n_centers=5,
                                     n_training_epochs=500,
                                     random_seed=24)
        model.fit(X, Y)

        x_cond = np.array([[0], [1]])

        CVaR_mixture = model.conditional_value_at_risk(x_cond, alpha=0.05)
        CVaR_cdf = BaseDensityEstimator.conditional_value_at_risk(model,
                                                                  x_cond,
                                                                  alpha=0.05,
                                                                  n_samples=5 *
                                                                  10**7)

        print("CVaR mixture:", CVaR_mixture)
        print("CVaR cdf:", CVaR_cdf)

        diff = np.mean(np.abs(CVaR_cdf - CVaR_mixture))
        self.assertAlmostEqual(diff, 0, places=1)
    def test_KMN_adaptive_noise(self):
        adaptive_noise_fn = lambda n, d: 0.0 if n < 1000 else 5.0

        X, Y = self.get_samples(mu=0, std=1, n_samples=999)
        est = KernelMixtureNetwork("kmn_999",
                                   1,
                                   1,
                                   n_centers=5,
                                   y_noise_std=0.0,
                                   x_noise_std=0.0,
                                   adaptive_noise_fn=adaptive_noise_fn)
        est.fit(X, Y)
        std_999 = est.std_(x_cond=np.array([[0.0]]))[0]

        X, Y = self.get_samples(mu=0, std=1, n_samples=1002)
        est = KernelMixtureNetwork("kmn_1002",
                                   1,
                                   1,
                                   n_centers=5,
                                   y_noise_std=0.0,
                                   x_noise_std=0.0,
                                   adaptive_noise_fn=adaptive_noise_fn)
        est.fit(X, Y)
        std_1002 = est.std_(x_cond=np.array([[0.0]]))[0]

        self.assertLess(std_999, std_1002)
        self.assertGreater(std_1002, 2)
def eval1():
  n_observations = 2000  # number of data points
  n_features = 1  # number of features

  X_train, X_test, y_train, y_test = build_econ1_dataset(n_observations)
  print("Size of features in training data: {}".format(X_train.shape))
  print("Size of output in training data: {}".format(y_train.shape))
  print("Size of features in test data: {}".format(X_test.shape))
  print("Size of output in test data: {}".format(y_test.shape))

  fig, ax = plt.subplots()
  fig.set_size_inches(10, 8)
  sns.regplot(X_train, y_train, fit_reg=False)
  # plt.savefig('toydata.png')
  # plt.show()
  # plot.figure.size = 100
  # plt.show()

  kmn = KernelMixtureNetwork(train_scales=True, n_centers=20)
  kmn.fit(X_train, y_train, n_epoch=300, eval_set=(X_test, y_test))
  kmn.plot_loss()
  # plt.savefig('trainplot.png')
  samples = kmn.sample(X_test)
  print(X_test.shape, samples.shape)
  jp = sns.jointplot(X_test.ravel(), samples, kind="hex", stat_func=None, size=10)
  jp.ax_joint.add_line(Line2D([X_test[0][0], X_test[0][0]], [-40, 40], linewidth=3))
  jp.ax_joint.add_line(Line2D([X_test[1][0], X_test[1][0]], [-40, 40], color='g', linewidth=3))
  jp.ax_joint.add_line(Line2D([X_test[2][0], X_test[2][0]], [-40, 40], color='r', linewidth=3))
  plt.savefig('hexplot.png')
  plt.show()
  d = kmn.predict_density(X_test[0:3, :].reshape(-1, 1), resolution=1000)
  df = pd.DataFrame(d).transpose()
  df.index = np.linspace(kmn.y_min, kmn.y_max, num=1000)
  df.plot(legend=False, linewidth=3, figsize=(12.2, 8))
  plt.savefig('conditional_density.png')
    def test7_data_normalization(self):
        X, Y = self.get_samples(std=2, mu=20)
        with tf.Session() as sess:
            model = KernelMixtureNetwork("kmn_data_normalization",
                                         1,
                                         1,
                                         n_centers=2,
                                         x_noise_std=None,
                                         y_noise_std=None,
                                         data_normalization=True,
                                         n_training_epochs=100)
            model.fit(X, Y)

            # test if data statistics were properly assigned to tf graph
            x_mean, x_std = sess.run([model.mean_x_sym, model.std_x_sym])
            print(x_mean, x_std)
            mean_diff = float(np.abs(x_mean - 20))
            std_diff = float(np.abs(x_std - 2))
            self.assertLessEqual(mean_diff, 0.5)
            self.assertLessEqual(std_diff, 0.5)
    def test9_data_normalization(self):
        np.random.seed(24)
        mean = -80
        std = 7
        data = np.random.normal([mean, mean, mean, mean], std, size=(4000, 4))
        X = data[:, 0:2]
        Y = data[:, 2:4]

        with tf.Session():
            model = KernelMixtureNetwork("kmn_data_normalization_2",
                                         2,
                                         2,
                                         n_centers=5,
                                         x_noise_std=None,
                                         y_noise_std=None,
                                         data_normalization=True,
                                         n_training_epochs=2000,
                                         random_seed=22,
                                         keep_edges=False,
                                         train_scales=True,
                                         weight_normalization=True,
                                         init_scales=np.array([1.0]))

            model.fit(X, Y)
            cond_mean = model.mean_(Y)
            print(np.mean(cond_mean))
            mean_diff = np.abs(mean - np.mean(cond_mean))
            self.assertLessEqual(mean_diff, np.abs(mean) * 0.1)

            cond_cov = np.mean(model.covariance(Y), axis=0)
            print(cond_cov)
            self.assertGreaterEqual(cond_cov[0][0], std**2 * 0.7)
            self.assertLessEqual(cond_cov[0][0], std**2 * 1.3)
            self.assertGreaterEqual(cond_cov[1][1], std**2 * 0.7)
            self.assertLessEqual(cond_cov[1][1], std**2 * 1.3)
    def test_KMN_with_2d_gaussian_2(self):
        mu = 200
        std = 23
        X, Y = self.get_samples(mu=mu, std=std)

        for method in ["agglomerative"]:
            with tf.Session() as sess:
                model = KernelMixtureNetwork("kmn2_" + method,
                                             1,
                                             1,
                                             center_sampling_method=method,
                                             n_centers=10,
                                             hidden_sizes=(16, 16),
                                             init_scales=np.array([1.0]),
                                             train_scales=True,
                                             data_normalization=True)
                model.fit(X, Y)

                y = np.arange(mu - 3 * std, mu + 3 * std, 6 * std / 20)
                x = np.asarray([mu for i in range(y.shape[0])])
                p_est = model.pdf(x, y)
                p_true = norm.pdf(y, loc=mu, scale=std)
                self.assertLessEqual(np.mean(np.abs(p_true - p_est)), 0.1)

                p_est = model.cdf(x, y)
                p_true = norm.cdf(y, loc=mu, scale=std)
                self.assertLessEqual(np.mean(np.abs(p_true - p_est)), 0.1)
    def test2_KMN_with_2d_gaussian_noise_x(self):
        np.random.seed(22)
        X = np.random.uniform(0, 6, size=4000)
        Y = X + np.random.normal(0, 1, size=4000)

        x_test_2 = np.ones(100) * 2
        x_test_4 = np.ones(100) * 4
        y_test = np.linspace(1, 5, num=100)

        with tf.Session():
            model_no_noise = KernelMixtureNetwork("kmn_no_noise_x",
                                                  1,
                                                  1,
                                                  n_centers=5,
                                                  x_noise_std=None,
                                                  y_noise_std=None)
            model_no_noise.fit(X, Y)
            pdf_distance_no_noise = np.mean(
                np.abs(
                    model_no_noise.pdf(x_test_2, y_test) -
                    model_no_noise.pdf(x_test_4, y_test)))

            model_noise = KernelMixtureNetwork("kmn_noise_x",
                                               1,
                                               1,
                                               n_centers=5,
                                               x_noise_std=2,
                                               y_noise_std=None)
            model_noise.fit(X, Y)
            pdf_distance_noise = np.mean(
                np.abs(
                    model_noise.pdf(x_test_2, y_test) -
                    model_noise.pdf(x_test_4, y_test)))

            print("Training w/o noise - pdf distance:", pdf_distance_no_noise)
            print("Training w/ noise - pdf distance", pdf_distance_noise)

            self.assertGreaterEqual(pdf_distance_no_noise / pdf_distance_noise,
                                    2.0)
    def testPickleUnpickleKDN(self):
        X, Y = self.get_samples()
        with tf.Session() as sess:
            model = KernelMixtureNetwork("kde",
                                         2,
                                         2,
                                         n_centers=10,
                                         n_training_epochs=10,
                                         data_normalization=True,
                                         weight_normalization=True)
            model.fit(X, Y)
            pdf_before = model.pdf(X, Y)

            # pickle and unpickle model
            dump_string = pickle.dumps(model)
        tf.reset_default_graph()
        with tf.Session() as sess:
            model_loaded = pickle.loads(dump_string)
            pdf_after = model_loaded.pdf(X, Y)

        diff = np.sum(np.abs(pdf_after - pdf_before))
        self.assertAlmostEqual(diff, 0, places=2)
    def test_KMN_with_2d_gaussian_sampling(self):
        np.random.seed(22)
        X, Y = self.get_samples(mu=5)

        import time
        t = time.time()
        model = KernelMixtureNetwork("kmn_sampling",
                                     1,
                                     1,
                                     center_sampling_method='k_means',
                                     n_centers=5,
                                     n_training_epochs=1000,
                                     data_normalization=True)
        print("time to build model:", time.time() - t)
        t = time.time()

        model.fit(X, Y)
        print("time to fit model:", time.time() - t)

        x_cond = 5 * np.ones(shape=(2000000, 1))
        _, y_sample = model.sample(x_cond)
        print(np.mean(y_sample), np.std(y_sample))
        self.assertAlmostEqual(np.mean(y_sample),
                               float(model.mean_(x_cond[1])),
                               places=1)
        self.assertAlmostEqual(np.std(y_sample),
                               float(model.covariance(x_cond[1])),
                               places=1)

        x_cond = np.ones(shape=(400000, 1))
        x_cond[0, 0] = 5.0
        _, y_sample = model.sample(x_cond)
        self.assertAlmostEqual(np.mean(y_sample),
                               float(model.mean_(x_cond[1])),
                               places=1)
        self.assertAlmostEqual(np.std(y_sample),
                               float(np.sqrt(model.covariance(x_cond[1]))),
                               places=1)
def eval_econ_data():
  gmm = GaussianMixture(ndim_x=1, ndim_y=1)
  econ_density = EconDensity()

  # print("ECON DATA --------------")
  # print("KMN")
  # for n_centers in [50, 100, 200]:
  #   kmn = KernelMixtureNetwork(n_centers=n_centers)
  #   gof = GoodnessOfFit(kmn, econ_density, n_observations=2000, print_fit_result=False, repeat_kolmogorov=1)
  #   gof_results = gof.compute_results()
  #   print("N_Centers:", n_centers)
  #   print(gof_results)

  print("LAZY-Learner:")
  nkde = KernelMixtureNetwork(n_training_epochs=10)
  gof = GoodnessOfFit(nkde, gmm, n_observations=100, print_fit_result=False)
  gof_results = gof.compute_results()
  print(gof_results)
  print(gof_results.report_dict())
    def test_MDN_KMN_eval_set(self):
        mu = 200
        std = 23
        X_train, Y_train = self.get_samples(mu=mu, std=std)
        X_test, Y_test = self.get_samples(mu=mu, std=std)
        X_test = X_test

        model = MixtureDensityNetwork("mdn_eval_set",
                                      1,
                                      1,
                                      n_centers=10,
                                      data_normalization=True,
                                      n_training_epochs=100)
        model.fit(X_train, Y_train, eval_set=(X_test, Y_test))

        model = KernelMixtureNetwork("kmn_eval_set",
                                     1,
                                     1,
                                     n_centers=10,
                                     data_normalization=True,
                                     n_training_epochs=100)
        model.fit(X_train, Y_train, eval_set=(X_test, Y_test))
    def test_1_KMN_with_2d_gaussian_fit_by_crossval(self):
        X, Y = self.get_samples()

        param_grid = {
            "n_centers": [3, 10],
            "center_sampling_method": ["k_means"],
            "keep_edges": [True]
        }

        model = KernelMixtureNetwork(center_sampling_method="k_means",
                                     n_centers=20)
        model.fit_by_cv(X, Y, param_grid=param_grid)

        y = np.arange(-1, 5, 0.5)
        x = np.asarray([2 for i in range(y.shape[0])])
        p_est = model.pdf(x, y)
        p_true = norm.pdf(y, loc=2, scale=1)
        self.assertEqual(model.get_params()["n_centers"], 10)
        self.assertLessEqual(np.mean(np.abs(p_true - p_est)), 0.2)
    def test_KMN_log_pdf(self):
        X, Y = np.random.normal(size=(1000, 3)), np.random.normal(size=(1000,
                                                                        2))

        for data_norm in [True, False]:
            with tf.Session() as sess:
                model = KernelMixtureNetwork("kmn_logprob" + str(data_norm),
                                             3,
                                             2,
                                             n_centers=5,
                                             hidden_sizes=(8, 8),
                                             init_scales=np.array([0.5]),
                                             n_training_epochs=10,
                                             data_normalization=data_norm)
                model.fit(X, Y)

                x, y = np.random.normal(size=(1000,
                                              3)), np.random.normal(size=(1000,
                                                                          2))
                prob = model.pdf(x, y)
                log_prob = model.log_pdf(x, y)
                self.assertLessEqual(np.mean(np.abs(prob - np.exp(log_prob))),
                                     0.001)
def plot_fitted_distribution():
  n_observations = 1000  # number of data points
  n_features = 3  # number of features

  np.random.seed(22)


  X_train, X_test, Y_train, Y_test = econ_density.simulate(n_observations)
  model = KernelMixtureNetwork()

  X_train = np.random.normal(loc=0, size=[n_observations, 1])
  Y_train = 3 * X_train + np.random.normal(loc=0, size=[n_observations, 1])
  X_test = np.random.normal(loc=0, size=[100, 1])
  Y_test = 3 * X_test + np.random.normal(loc=0, size=[100, 1])

  model.fit(X_train, Y_train)
  print(model.score(X_test, Y_test))
  #print(model.fit_by_cv(X_train, Y_train))



  # plt.scatter(model.X_train, model.Y_test)
  # plt.scatter(model.centr_x, model.centr_y, s=10*model.alpha)
  # plt.show()
  #
  # fig, ax = plt.subplots()
  # fig.set_size_inches(10, 8)
  # sns.regplot(X_train, Y_train, fit_reg=False)
  # plt.show()
  #
  #


  n_samples = 1000

  Y_plot = np.linspace(-10, 10, num=n_samples)

  X_plot = np.expand_dims(np.asarray([-1 for _ in range(n_samples)]), axis=1)
  result = model.pdf(X_plot, Y_plot)
  plt.plot(Y_plot, result)
  #plt.show()

  #2d plot
  X_plot = np.expand_dims(np.asarray([2 for _ in range(n_samples)]), axis=1)
  result = model.pdf(X_plot, Y_plot)
  plt.plot(Y_plot, result)

  plt.show()

  #3d plot
  n_samples = 100
  linspace_x = np.linspace(-15, 15, num=n_samples)
  linspace_y = np.linspace(-15, 15, num=n_samples)
  X, Y = np.meshgrid(linspace_x, linspace_y)
  X, Y = X.flatten(), Y.flatten()

  Z = model.pdf(X, Y)

  X, Y, Z = X.reshape([n_samples, n_samples]), Y.reshape([n_samples, n_samples]), Z.reshape([n_samples, n_samples])
  fig = plt.figure()
  ax = fig.gca(projection='3d')
  surf = ax.plot_surface(X, Y, Z, cmap=cm.coolwarm,
                         linewidth=0, antialiased=True)

  plt.show()
import warnings
warnings.filterwarnings("ignore")

from cde.density_simulation import SkewNormal
from cde.density_estimator import KernelMixtureNetwork
import numpy as np
""" simulate some data """
seed = 22
density_simulator = SkewNormal(random_seed=seed)
X, Y = density_simulator.simulate(n_samples=3000)
""" fit density model """
model = KernelMixtureNetwork("KDE_demo",
                             ndim_x=1,
                             ndim_y=1,
                             n_centers=50,
                             x_noise_std=0.2,
                             y_noise_std=0.1,
                             random_seed=22)
model.fit(X, Y)
""" query the conditional pdf and cdf"""
x_cond = np.zeros((1, 1))
y_query = np.ones((1, 1)) * 0.1
prob = model.pdf(x_cond, y_query)
cum_prob = model.cdf(x_cond, y_query)
""" compute conditional moments & VaR  """
x_cond = np.zeros((1, 1))

mean = model.mean_(x_cond)[0][0]
std = model.std_(x_cond)[0][0]
skewness = model.skewness(x_cond)[0]
VaR = model.value_at_risk(x_cond, alpha=0.01)[0]