Exemplo n.º 1
0
    def test_sample(self):
        # invariant checks
        self.inv_test.md_arg(gr.eval_sample, df_arg="df_det")
        self.inv_test.df_arg(gr.eval_sample,
                             df_arg="df_det",
                             shortcut=True,
                             acc_none="var_det")

        # No `n` provided
        with self.assertRaises(ValueError):
            gr.eval_sample(self.md, df_det="nom")

        df_min = gr.eval_sample(self.md, n=1, df_det="nom")
        self.assertTrue(df_min.shape == (1, self.md.n_var + self.md.n_out))
        self.assertTrue(set(df_min.columns) == set(self.md.var + self.md.out))

        # Seed fixes runs
        df_seeded = gr.eval_sample(self.md, n=10, df_det="nom", seed=101)
        df_piped = self.md >> gr.ev_sample(df_det="nom", n=10, seed=101)
        self.assertTrue(df_seeded.equals(df_piped))

        df_skip = gr.eval_sample(self.md, n=1, df_det="nom", skip=True)
        self.assertTrue(set(df_skip.columns) == set(self.md.var))

        df_noappend = gr.eval_sample(self.md, n=1, df_det="nom", append=False)
        self.assertTrue(set(df_noappend.columns) == set(self.md.out))
Exemplo n.º 2
0
    def test_ev_sample(self):
        """Check ev_sample()
        """
        df_res = gr.eval_sample(self.model_default,
                                n=1,
                                seed=101,
                                df_det="nom")

        self.assertTrue(
            gr.df_equal(
                df_res, self.model_default >> gr.ev_sample(
                    seed=101, n=1, df_det="nom")))
Exemplo n.º 3
0
    def test_eval_input_subsets(self):
        """ Test inputs are subsets of the provided DataFrames for eval_pnd()
        """
        # Model to make Dataset
        md_true = make_pareto_random(twoDim=False)

        # Create dataframe
        df_data = (
            md_true
            >> gr.ev_sample(n=2e3, seed=101, df_det="nom")
        )
        ## Select training set
        df_train = (
            df_data
            >> gr.tf_sample(n=10)
        )
        ## select test set
        df_test = (
            df_data
                >> gr.tf_anti_join(
                    df_train,
                    by=["x1", "x2"],
                )
                >> gr.tf_sample(n=200)
        )

        # Create fitted model
        md_fit = (
            df_train
            >> ft_gp(
                var=["x1", "x2", "x3"],
                out=["y1", "y2", "y3"],
            )
        )

        # Call eval_pnd w/ only "y1" and "y2"
        df_pnd = (
            md_fit
            >> gr.ev_pnd(
                df_train,
                df_test,
                signs = {"y1":1, "y2":1},
                seed = 101
            )
        )

        ### how to imply x1 and x2 from y1 and y2?

        # Test for correctness by shape
        self.assertTrue(len(df_pnd) == df_test.shape[0])
        # Test for correctness by # of outputs
        self.assertTrue(len(df_pnd.columns.values) == len(df_test.columns.values) + 2)
Exemplo n.º 4
0
    def test_eval_append(self):
        """ Test append parameter on eval_pnd()
        """
        # Model to make Dataset
        md_true = make_pareto_random(twoDim=False)

        # Create dataframe
        df_data = (
            md_true
            >> gr.ev_sample(n=2e3, seed=101, df_det="nom")
        )
        ## Select training set
        df_train = (
            df_data
            >> gr.tf_sample(n=10)
        )
        ## select test set
        df_test = (
            df_data
                >> gr.tf_anti_join(
                    df_train,
                    by=["x1", "x2"],
                )
                >> gr.tf_sample(n=200)
        )

        # Create fitted model
        md_fit = (
            df_train
            >> ft_gp(
                var=["x1", "x2", "x3"],
                out=["y1", "y2", "y3"],
            )
        )

        # Call eval_pnd
        df_pnd = (
            md_fit
            >> gr.ev_pnd(
                df_train,
                df_test,
                signs = {"y1":1, "y2":1,"y3":1},
                seed = 101,
                append = False
            )
        )

        # Test for correctness by shape
        self.assertTrue(len(df_pnd) == df_test.shape[0])
        # Test for correctness by # of outputs
        self.assertTrue(len(df_pnd.columns.values) == 2)
Exemplo n.º 5
0
    def test_eval_faulty_inputs(self):
        """ Test faulty inputs to eval_pnd
        """
        # Model to make Dataset
        md_true = make_pareto_random()
        # Create dataframe
        df_data = (
            md_true
            >> gr.ev_sample(n=2e3, seed=101, df_det="nom")
        )
        ## Select training set
        df_train = (
            df_data
            >> gr.tf_sample(n=10)
        )
        ## select test set
        df_test = (
            df_data
                >> gr.tf_anti_join(
                    df_train,
                    by=["x1", "x2"],
                )
                >> gr.tf_sample(n=200)
        )

        # Create fitted model
        md_fit = (
            df_train
            >> ft_gp(
                var=["x1", "x2"],
                out=["y1", "y2"],
            )
        )

        # Call eval_pnd
        with self.assertRaises(ValueError):
            df_pnd = (
                md_fit
                >> gr.ev_pnd(
                    df_train,
                    df_test,
                    signs = {"y":1, "y2":1},
                    seed = 101
                )
            )
Exemplo n.º 6
0
    def test_sample(self):
        # No `n` provided
        with self.assertRaises(ValueError):
            gr.eval_sample(self.md, df_det="nom")

        df_min = gr.eval_sample(self.md, n=1, df_det="nom")
        self.assertTrue(df_min.shape == (1, self.md.n_var + self.md.n_out))
        self.assertTrue(set(df_min.columns) == set(self.md.var + self.md.out))

        # Seed fixes runs
        df_seeded = gr.eval_sample(self.md, n=10, df_det="nom", seed=101)
        df_piped = self.md >> gr.ev_sample(df_det="nom", n=10, seed=101)
        self.assertTrue(df_seeded.equals(df_piped))

        df_skip = gr.eval_sample(self.md, n=1, df_det="nom", skip=True)
        self.assertTrue(set(df_skip.columns) == set(self.md.var))

        df_noappend = gr.eval_sample(self.md, n=1, df_det="nom", append=False)
        self.assertTrue(set(df_noappend.columns) == set(self.md.out))
Exemplo n.º 7
0
    def test_tran_reweight(self):
        """Test the functionality of tran_reweight()

        """
        ## Correctness
        # Choose scale based on Owen (2013) Exercise 9.7
        md_new = (self.md >> gr.cp_marginals(
            x=dict(dist="norm", loc=0, scale=sqrt(4 / 5))))

        df_base = (self.md >> gr.ev_sample(n=500, df_det="nom", seed=101))

        df = (df_base >> gr.tf_reweight(md_base=self.md, md_new=md_new) >>
              gr.tf_summarize(
                  mu=gr.mean(DF.y * DF.weight),
                  se=gr.sd(DF.y * DF.weight) / gr.sqrt(gr.n(DF.weight)),
                  se_orig=gr.sd(DF.y) / gr.sqrt(gr.n(DF.weight)),
              ))
        mu = df.mu[0]
        se = df.se[0]
        se_orig = df.se_orig[0]

        self.assertTrue(mu - se * 2 < 0 and 0 < mu + se * 2)

        ## Optimized IS should be more precise than ordinary monte carlo
        # print("se_orig = {0:4.3f}".format(se_orig))
        # print("se      = {0:4.3f}".format(se))
        self.assertTrue(se < se_orig)

        ## Invariants
        # Missing input in data
        with self.assertRaises(ValueError):
            gr.tran_reweight(df_base[["y"]], md_base=self.md, md_new=self.md)
        # Input mismatch
        with self.assertRaises(ValueError):
            gr.tran_reweight(df_base, md_base=self.md, md_new=gr.Model())
        # Weights collision
        with self.assertRaises(ValueError):
            gr.tran_reweight(df_base >> gr.tf_mutate(weight=0),
                             md_base=self.md,
                             md_new=self.md)
Exemplo n.º 8
0
    def test_nls(self):
        ## Ground-truth model
        c_true = 2
        a_true = 1

        md_true = (gr.Model() >> gr.cp_function(
            fun=lambda x: a_true * np.exp(x[0] * c_true) + x[1],
            var=["x", "epsilon"],
            out=["y"],
        ) >> gr.cp_marginals(epsilon={
            "dist": "norm",
            "loc": 0,
            "scale": 0.5
        }) >> gr.cp_copula_independence())
        df_data = md_true >> gr.ev_sample(
            n=5, seed=101, df_det=gr.df_make(x=[0, 1, 2, 3, 4]))

        ## Model to fit
        md_param = (gr.Model() >> gr.cp_function(
            fun=lambda x: x[2] * np.exp(x[0] * x[1]),
            var=["x", "c", "a"],
            out=["y"]) >> gr.cp_bounds(c=[0, 4], a=[0.1, 2.0]))

        ## Fit the model
        md_fit = df_data >> gr.ft_nls(
            md=md_param,
            verbose=False,
            uq_method="linpool",
        )

        ## Unidentifiable model throws warning
        # -------------------------
        md_unidet = (gr.Model() >> gr.cp_function(
            fun=lambda x: x[2] / x[3] * np.exp(x[0] * x[1]),
            var=["x", "c", "a", "z"],
            out=["y"],
        ) >> gr.cp_bounds(c=[0, 4], a=[0.1, 2.0], z=[0, 1]))
        with self.assertWarns(RuntimeWarning):
            gr.fit_nls(
                df_data,
                md=md_unidet,
                uq_method="linpool",
            )

        ## True parameters in wide confidence region
        # -------------------------
        alpha = 1e-3
        self.assertTrue(
            (md_fit.density.marginals["c"].q(alpha / 2) <= c_true)
            and (c_true <= md_fit.density.marginals["c"].q(1 - alpha / 2)))

        self.assertTrue(
            (md_fit.density.marginals["a"].q(alpha / 2) <= a_true)
            and (a_true <= md_fit.density.marginals["a"].q(1 - alpha / 2)))

        ## Model with fixed parameter
        # -------------------------
        md_fixed = (gr.Model() >> gr.cp_function(
            fun=lambda x: x[2] * np.exp(x[0] * x[1]),
            var=["x", "c", "a"],
            out=["y"]) >> gr.cp_bounds(c=[0, 4], a=[1, 1]))
        md_fit_fixed = df_data >> gr.ft_nls(
            md=md_fixed, verbose=False, uq_method="linpool")

        # Test that fixed model can evaluate successfully
        gr.eval_sample(md_fit_fixed, n=1, df_det="nom")

        ## Trajectory model
        # -------------------------
        md_base = models.make_trajectory_linear()
        md_fit = data.df_trajectory_windowed >> gr.ft_nls(
            md=md_base, method="SLSQP", tol=1e-3)
        df_tmp = md_fit >> gr.ev_nominal(df_det="nom")

        ## Select output for fitting
        # -------------------------
        # Split model has inconsistent "true" parameter value
        md_split = (gr.Model("Split") >> gr.cp_vec_function(
            fun=lambda df: gr.df_make(
                f=1 * df.c * df.x,
                g=2 * df.c * df.x,
            ),
            var=["c", "x"],
            out=["f", "g"],
        ) >> gr.cp_bounds(
            x=(-1, +1),
            c=(-1, +1),
        ))

        df_split = (gr.df_make(x=gr.linspace(-1, +1, 100)) >> gr.tf_mutate(
            f=X.x, g=X.x))

        # Fitting both outputs: cannot achieve mse ~= 0
        df_both = (df_split >> gr.ft_nls(md_split, out=["f", "g"]) >>
                   gr.ev_df(df_split >> gr.tf_rename(f_t=X.f, g_t=X.g)) >>
                   gr.tf_summarize(
                       mse_f=gr.mse(X.f, X.f_t),
                       mse_g=gr.mse(X.g, X.g_t),
                   ))
        self.assertTrue(df_both.mse_f[0] > 0)
        self.assertTrue(df_both.mse_g[0] > 0)

        # Fitting "f" only
        df_f = (df_split >> gr.ft_nls(md_split, out=["f"]) >>
                gr.ev_df(df_split >> gr.tf_rename(f_t=X.f, g_t=X.g)) >>
                gr.tf_summarize(
                    mse_f=gr.mse(X.f, X.f_t),
                    mse_g=gr.mse(X.g, X.g_t),
                ))
        self.assertTrue(df_f.mse_f[0] < 1e-16)
        self.assertTrue(df_f.mse_g[0] > 0)

        # Fitting "g" only
        df_g = (df_split >> gr.ft_nls(md_split, out=["g"]) >>
                gr.ev_df(df_split >> gr.tf_rename(f_t=X.f, g_t=X.g)) >>
                gr.tf_summarize(
                    mse_f=gr.mse(X.f, X.f_t),
                    mse_g=gr.mse(X.g, X.g_t),
                ))
        self.assertTrue(df_g.mse_f[0] > 0)
        self.assertTrue(df_g.mse_g[0] < 1e-16)
Exemplo n.º 9
0
    def test_nls(self):
        ## Ground-truth model
        c_true = 2
        a_true = 1

        md_true = (gr.Model() >> gr.cp_function(
            fun=lambda x: a_true * np.exp(x[0] * c_true) + x[1],
            var=["x", "epsilon"],
            out=["y"],
        ) >> gr.cp_marginals(epsilon={
            "dist": "norm",
            "loc": 0,
            "scale": 0.5
        }) >> gr.cp_copula_independence())
        df_data = md_true >> gr.ev_sample(
            n=5, seed=101, df_det=gr.df_make(x=[0, 1, 2, 3, 4]))

        ## Model to fit
        md_param = (gr.Model() >> gr.cp_function(
            fun=lambda x: x[2] * np.exp(x[0] * x[1]),
            var=["x", "c", "a"],
            out=["y"]) >> gr.cp_bounds(c=[0, 4], a=[0.1, 2.0]))

        ## Fit the model
        md_fit = df_data >> gr.ft_nls(
            md=md_param,
            verbose=False,
            uq_method="linpool",
        )

        ## Unidentifiable model throws warning
        # -------------------------
        md_unidet = (gr.Model() >> gr.cp_function(
            fun=lambda x: x[2] / x[3] * np.exp(x[0] * x[1]),
            var=["x", "c", "a", "z"],
            out=["y"],
        ) >> gr.cp_bounds(c=[0, 4], a=[0.1, 2.0], z=[0, 1]))
        with self.assertWarns(RuntimeWarning):
            gr.fit_nls(
                df_data,
                md=md_unidet,
                uq_method="linpool",
            )

        ## True parameters in wide confidence region
        # -------------------------
        alpha = 1e-3
        self.assertTrue(
            (md_fit.density.marginals["c"].q(alpha / 2) <= c_true)
            and (c_true <= md_fit.density.marginals["c"].q(1 - alpha / 2)))

        self.assertTrue(
            (md_fit.density.marginals["a"].q(alpha / 2) <= a_true)
            and (a_true <= md_fit.density.marginals["a"].q(1 - alpha / 2)))

        ## Model with fixed parameter
        # -------------------------
        md_fixed = (gr.Model() >> gr.cp_function(
            fun=lambda x: x[2] * np.exp(x[0] * x[1]),
            var=["x", "c", "a"],
            out=["y"]) >> gr.cp_bounds(c=[0, 4], a=[1, 1]))
        md_fit_fixed = df_data >> gr.ft_nls(
            md=md_fixed, verbose=False, uq_method="linpool")

        # Test that fixed model can evaluate successfully
        gr.eval_sample(md_fit_fixed, n=1, df_det="nom")

        ## Trajectory model
        # -------------------------
        md_base = models.make_trajectory_linear()
        md_fit = data.df_trajectory_windowed >> gr.ft_nls(
            md=md_base, method="SLSQP", tol=1e-3)
        df_tmp = md_fit >> gr.ev_nominal(df_det="nom")