Exemplo n.º 1
0
    def get_jacs(
            self,
            input_data: InputDataGLM
    ):
        if self.noise_model is None:
            raise ValueError("noise_model is None")
        else:
            if self.noise_model == "nb":
                from batchglm.api.models.tf1.glm_nb import Estimator
            elif self.noise_model == "norm":
                from batchglm.api.models import Estimator
            elif self.noise_model == "beta":
                from batchglm.api.models.tf1.glm_beta import Estimator
            else:
                raise ValueError("noise_model not recognized")

        provide_optimizers = {"gd": True, "adam": True, "adagrad": True, "rmsprop": True,
                              "nr": False, "nr_tr": False,
                              "irls": False, "irls_gd": False, "irls_tr": False, "irls_gd_tr": False}

        estimator = Estimator(
            input_data=input_data,
            quick_scale=False,
            provide_optimizers=provide_optimizers,
            provide_fim=False,
            provide_hessian=False,
            init_a="standard",
            init_b="standard"
        )
        estimator.initialize()
        # Do not train, evaluate at initialization!
        estimator.train_sequence(training_strategy=[
            {
                "convergence_criteria": "step",
                "stopping_criteria": 0,
                "use_batching": False,
                "optim_algo": "gd",
                "train_mu": False,
                "train_r": False
            },
        ])
        estimator_store = estimator.finalize()
        return estimator_store.gradients.values
Exemplo n.º 2
0
class _TestAccuracyGlmAllEstim:
    def __init__(self, simulator, quick_scale, noise_model, sparse, init_mode):
        if noise_model is None:
            raise ValueError("noise_model is None")
        else:
            if noise_model == "nb":
                from batchglm.api.models.tf1.glm_nb import Estimator, InputDataGLM
            elif noise_model == "norm":
                from batchglm.api.models import Estimator, InputDataGLM
            elif noise_model == "beta":
                from batchglm.api.models.tf1.glm_beta import Estimator, InputDataGLM
            else:
                raise ValueError("noise_model not recognized")

        batch_size = 2000
        provide_optimizers = {
            "gd": True,
            "adam": True,
            "adagrad": True,
            "rmsprop": True,
            "nr": True,
            "nr_tr": True,
            "irls": noise_model in ["nb", "norm"],
            "irls_gd": noise_model in ["nb", "norm"],
            "irls_tr": noise_model in ["nb", "norm"],
            "irls_gd_tr": noise_model in ["nb", "norm"]
        }

        if sparse:
            input_data = InputDataGLM(
                data=scipy.sparse.csr_matrix(simulator.input_data.x),
                design_loc=simulator.input_data.design_loc,
                design_scale=simulator.input_data.design_scale,
                design_loc_names=simulator.input_data.design_loc_names,
                design_scale_names=simulator.input_data.design_scale_names,
                constraints_loc=simulator.input_data.constraints_loc,
                constraints_scale=simulator.input_data.constraints_scale,
                size_factors=simulator.input_data.size_factors,
                as_dask=False)
        else:
            input_data = InputDataGLM(
                data=simulator.input_data.x,
                design_loc=simulator.input_data.design_loc,
                design_scale=simulator.input_data.design_scale,
                design_loc_names=simulator.input_data.design_loc_names,
                design_scale_names=simulator.input_data.design_scale_names,
                constraints_loc=simulator.input_data.constraints_loc,
                constraints_scale=simulator.input_data.constraints_scale,
                size_factors=simulator.input_data.size_factors,
                as_dask=False)

        self.estimator = Estimator(input_data=input_data,
                                   batch_size=batch_size,
                                   quick_scale=quick_scale,
                                   provide_optimizers=provide_optimizers,
                                   provide_batched=True,
                                   provide_fim=noise_model in ["nb", "norm"],
                                   provide_hessian=True,
                                   init_a=init_mode,
                                   init_b=init_mode)
        self.sim = simulator

    def estimate(self, algo, batched, acc, lr):
        self.estimator.initialize()
        self.estimator.train_sequence(training_strategy=[
            {
                "learning_rate": lr,
                "convergence_criteria": "all_converged",
                "stopping_criteria": acc,
                "use_batching": batched,
                "optim_algo": algo,
            },
        ])

    def eval_estimation(self, batched, train_loc, train_scale):
        if batched:
            threshold_dev_a = 0.4
            threshold_dev_b = 0.4
            threshold_std_a = 2
            threshold_std_b = 2
        else:
            threshold_dev_a = 0.2
            threshold_dev_b = 0.2
            threshold_std_a = 1
            threshold_std_b = 1

        success = True
        if train_loc:
            mean_rel_dev_a = np.mean(
                (self.estimator.model.a_var - self.sim.a_var) / self.sim.a_var)
            std_rel_dev_a = np.std(
                (self.estimator.model.a_var - self.sim.a_var) / self.sim.a_var)

            logging.getLogger("batchglm").info("mean_rel_dev_a %f" %
                                               mean_rel_dev_a)
            logging.getLogger("batchglm").info("std_rel_dev_a %f" %
                                               std_rel_dev_a)

            if np.abs(mean_rel_dev_a
                      ) > threshold_dev_a or std_rel_dev_a > threshold_std_a:
                success = False
        if train_scale:
            mean_rel_dev_b = np.mean(
                (self.estimator.model.b_var - self.sim.b_var) / self.sim.b_var)
            std_rel_dev_b = np.std(
                (self.estimator.model.b_var - self.sim.b_var) / self.sim.b_var)

            logging.getLogger("batchglm").info("mean_rel_dev_b %f" %
                                               mean_rel_dev_b)
            logging.getLogger("batchglm").info("std_rel_dev_b %f" %
                                               std_rel_dev_b)

            if np.abs(mean_rel_dev_b
                      ) > threshold_dev_b or std_rel_dev_b > threshold_std_b:
                success = False

        return success