Exemplo n.º 1
0
    def simulate_from_normal(self,
                             variable,
                             n=1,
                             bias=0,
                             sd=1,
                             transformation_pair=(None, None)):
        """Simulates n values from the normal distribution for each value of self.values_from_mr 
        for the given variable:
        N(v+bias, sd^2), where v is (possibly) transformed matrix self.values_from_mr using the first element 
        of transformation_pair. The second element is applied to the results.
        The resulting array has as many rows as self.values_from_mr. Number of columns is 
        equal to the number of columns of self.values_from_mr * n
        """
        if n < 1:
            return None

        values = try_transformation(self.values_from_mr[variable],
                                    transformation_pair[0])
        print self.values_from_mr[variable]
        print values
        result = normal(values + bias,
                        sd,
                        size=self.values_from_mr[variable].shape)
        for i in range(1, n):
            result = concatenate(
                (result,
                 normal(values + bias,
                        sd,
                        size=self.values_from_mr[variable].shape)),
                axis=1)
        print result
        print try_transformation(result, transformation_pair[1])
        return try_transformation(result, transformation_pair[1])
Exemplo n.º 2
0
 def _run_stochastic_test_poisson(
     self, function, expected_results, number_of_iterations, significance_level=0.01, transformation=None
 ):
     """
     Run the given function for the specified number_of_iterations.
     Uses Bayesian statistics to determine whether the produced results are
     within the specified significance_level of the expected_results.
     """
     K = expected_results.size
     sum_y = zeros(K, dtype=float32)
     x_kr = zeros((number_of_iterations, K), dtype=float32)
     for i in range(number_of_iterations):
         y_r = function()
         x_kr[i, :] = try_transformation(y_r, transformation)
         sum_y = sum_y + x_kr[i, :]
     lambdak = sum_y / float(number_of_iterations)
     lambdanull = try_transformation(expected_results.astype(float32), transformation)
     #        print lambdak
     #        print lambdanull
     sumxk = sum(x_kr, axis=0)
     LRTS = 2.0 * (
         (number_of_iterations * (lambdanull - lambdak).sum())
         + (ln(lambdak / ma.masked_where(lambdanull == 0, lambdanull)) * sumxk).sum()
     )
     prob = chisqprob(LRTS, K)
     # print LRTS, prob
     logger.log_status("Stochastic Test Poisson: LRTS=" + str(LRTS) + ", df=", str(K), ", p=" + str(prob))
     return (prob >= significance_level, "prob=%f < significance level of %f" % (prob, significance_level))
 def compute_stochastic_test_normal(self,
                                    function,
                                    expected_results,
                                    number_of_iterations,
                                    significance_level=0.01,
                                    transformation="sqrt"):
     K = expected_results.size
     sum_y = zeros(K, dtype=float32)
     x_kr = zeros((number_of_iterations, K), dtype=float32)
     texpected_results = try_transformation(expected_results,
                                            transformation)
     for i in range(number_of_iterations):
         y_r = function()
         x_kr[i, :] = try_transformation(y_r, transformation)
         sum_y = sum_y + x_kr[i, :]
     muest = sum_y / float(number_of_iterations)
     sigma_1 = ((x_kr - muest)**2.0).sum() / float(number_of_iterations * K)
     self.variance = variance(
         x_kr,
         labels=reshape(array(number_of_iterations * range(1, K + 1)),
                        (number_of_iterations, K)),
         index=arange(K) + 1)
     sigma_0 = ((x_kr - texpected_results)**2.0).sum() / float(
         number_of_iterations * K)
     LRTS = number_of_iterations * K * log(sigma_0 / sigma_1)
     prob = chisqprob(LRTS, K)
     return (K, LRTS, prob)
Exemplo n.º 4
0
 def estimate_mu(self):
     iout = -1
     self.values_from_mr = {}
     for quantity in self.observed_data.get_quantity_objects():
         dataset_name = quantity.get_dataset_name()
         variable = quantity.get_variable_name()
         iout += 1
         dimension_reduced = False
         quantity_ids = quantity.get_dataset().get_id_attribute()
         for i in range(self.number_of_runs):
             ds = self._compute_variable_for_one_run(i, variable, dataset_name, self.get_calibration_year(), quantity)
             if isinstance(ds, InteractionDataset):
                 ds = ds.get_flatten_dataset()
             if i == 0: # first run
                 self.mu[iout] = zeros((self.y[iout].size, self.number_of_runs), dtype=float32)
                 ids = ds.get_id_attribute()
             else:
                 if ds.size() > ids.shape[0]:
                     ds = DatasetSubset(ds, ds.get_id_index(ids))
                     dimension_reduced = True
             scale = self.get_scales(ds, i+1, variable)
             matching_index = ds.get_id_index(quantity_ids)
             values = scale[matching_index] * ds.get_attribute(variable)[matching_index]
             self.mu[iout][:,i] = try_transformation(values, quantity.get_transformation())
             
         self.values_from_mr[variable.get_expression()] = self.mu[iout]
         if dimension_reduced:
             self.y[iout] = self.y[iout][quantity.get_dataset().get_id_index(ids)]
Exemplo n.º 5
0
 def _get_m_from_values(self, values, ids):
     self.m = values
     self.m_ids = ids
     if self.m.ndim < 2:
         self.m = resize(self.m, (self.m.size, 1))
     self.m = try_transformation(self.m,
                                 self.transformation_pair_for_prediction[0])
Exemplo n.º 6
0
    def generate_posterior_distribution(
            self,
            year,
            quantity_of_interest,
            cache_directory=None,
            values=None,
            ids=None,
            procedure="opus_core.bm_normal_posterior",
            use_bias_and_variance_from=None,
            transformed_back=True,
            transformation_pair=(None, None),
            **kwargs):
        if cache_directory is not None:
            self.cache_set = array([cache_directory])
            #self.set_cache_attributes(cache_directory)
        else:
            if values is None or ids is None:
                raise StandardError, "Either cache_directory or values and ids must be given."

        self.set_posterior(
            year,
            quantity_of_interest,
            values=values,
            ids=ids,
            use_bias_and_variance_from=use_bias_and_variance_from,
            transformation_pair=transformation_pair)
        procedure_class = ModelComponentCreator().get_model_component(
            procedure)
        self.simulated_values = procedure_class.run(self, **kwargs)
        if transformed_back and (self.transformation_pair_for_prediction[0]
                                 is not None):  # need to transform back
            self.simulated_values = try_transformation(
                self.simulated_values,
                self.transformation_pair_for_prediction[1])
        return self.simulated_values
Exemplo n.º 7
0
    def generate_posterior_distribution(
            self,
            year,
            quantity_of_interest,
            procedure="opus_core.bm_normal_posterior",
            use_bias_and_variance_from=None,
            transformed_back=True,
            aggregate_to=None,
            intermediates=[],
            **kwargs):
        """
        'quantity_of_interest' is a variable name about which we want to get the posterior distribution.
        If there is multiple known_output, it must be made clear from which one the bias and variance
        is to be used (argument use_bias_and_variance_from) If it is None, the first known output is used.
        """
        self.set_posterior(year, quantity_of_interest,
                           use_bias_and_variance_from)
        procedure_class = ModelComponentCreator().get_model_component(
            procedure)
        self.simulated_values = procedure_class.run(self, **kwargs)
        if transformed_back and (self.transformation_pair_for_prediction[0]
                                 is not None):  # need to transform back
            self.simulated_values = try_transformation(
                self.simulated_values,
                self.transformation_pair_for_prediction[1])
        if aggregate_to is not None:
            self.simulated_values = self.aggregate(
                self.simulated_values,
                aggregate_from=VariableName(
                    quantity_of_interest).get_dataset_name(),
                aggregate_to=aggregate_to,
                intermediates=intermediates)

        return self.simulated_values
Exemplo n.º 8
0
 def get_data_for_quantity(self, transformed_back=True):
     transformation, inverse_transformation = self.observed_data.get_quantity_object_by_index(
         self.use_bias_and_variance_index).get_transformation_pair()
     if transformed_back and (transformation is not None):
         return try_transformation(self.y[self.use_bias_and_variance_index],
                                   inverse_transformation)
     return self.y[self.use_bias_and_variance_index]
Exemplo n.º 9
0
 def compute_m(self, year, quantity_of_interest):
     variable_name = VariableName(quantity_of_interest)
     dataset_name = variable_name.get_dataset_name()
     for i in range(self.number_of_runs):
         ds = self._compute_variable_for_one_run(i, variable_name, dataset_name, year, self.observed_data.get_quantity_object(quantity_of_interest))
         if i == 0: # first run
             self.m = zeros((ds.size(), self.number_of_runs), dtype=float32)
             self.m_ids = ds.get_id_attribute()
         self.m[:, i] = try_transformation(ds.get_attribute(variable_name), self.transformation_pair_for_prediction[0])
 def compute_stochastic_test_normal(self, function, expected_results,
                         number_of_iterations, significance_level=0.01, transformation="sqrt"):
     K = expected_results.size
     sum_y = zeros(K, dtype=float32)
     x_kr = zeros((number_of_iterations, K), dtype=float32)
     texpected_results = try_transformation(expected_results, transformation)
     for i in range(number_of_iterations):
         y_r = function()
         x_kr[i,:] = try_transformation(y_r, transformation)
         sum_y = sum_y + x_kr[i,:]
     muest = sum_y/float(number_of_iterations)
     sigma_1 = ((x_kr - muest)**2.0).sum()/float(number_of_iterations*K)
     self.variance = variance(x_kr, labels=reshape(array(number_of_iterations*range(1,K+1)),
                                                 (number_of_iterations,K)),
                              index=arange(K)+1)
     sigma_0 = ((x_kr - texpected_results)**2.0).sum()/float(number_of_iterations*K)
     LRTS = number_of_iterations*K * log(sigma_0/sigma_1)
     prob = chisqprob(LRTS, K)
     return (K, LRTS, prob)
Exemplo n.º 11
0
 def get_exact_quantile(self, alpha, transformed_back=True, **kwargs):
     vars = self.get_posterior_component_variance()
     means = self.get_posterior_component_mean()
     weights = self.get_weights()
     sig = sqrt(vars)
     res = zeros(means.size)
     for i in range(means.size):
         res[i] = bmaquant(alpha, weights, means[i], sig, **kwargs)
     if transformed_back and (self.transformation_pair_for_prediction[0] is not None): 
         res = try_transformation(res, self.transformation_pair_for_prediction[1])
     return res
Exemplo n.º 12
0
    def simulate_from_normal(self, variable, n=1, bias=0, sd=1, transformation_pair=(None, None)):
        """Simulates n values from the normal distribution for each value of self.values_from_mr 
        for the given variable:
        N(v+bias, sd^2), where v is (possibly) transformed matrix self.values_from_mr using the first element 
        of transformation_pair. The second element is applied to the results.
        The resulting array has as many rows as self.values_from_mr. Number of columns is 
        equal to the number of columns of self.values_from_mr * n
        """
        if n < 1:
            return None

        values = try_transformation(self.values_from_mr[variable], transformation_pair[0])
        print self.values_from_mr[variable]
        print values
        result = normal(values+bias, sd, size=self.values_from_mr[variable].shape)
        for i in range(1,n):
            result = concatenate((result, normal(values+bias, sd, size=self.values_from_mr[variable].shape)), axis=1)
        print result
        print try_transformation(result, transformation_pair[1])
        return try_transformation(result, transformation_pair[1])
Exemplo n.º 13
0
 def compute_m(self, year, quantity_of_interest, values=None, ids=None):
     if (values is not None) and (ids is not None):
         self._get_m_from_values(values, ids)
         return
     variable_name = VariableName(quantity_of_interest)
     dataset_name = variable_name.get_dataset_name()
     for i in range(self.cache_set.size):
         ds = self._compute_variable_for_one_run(i, variable_name, dataset_name, year)
         if i == 0: # first run
             m = zeros((ds.size(), self.cache_set.size), dtype=float32)
             self.m_ids = ds.get_id_attribute()
         m[:, i] = try_transformation(ds.get_attribute(variable_name), self.transformation_pair_for_prediction[0])
     self.m = resize(average(m, axis=1), (m.shape[0], 1))
Exemplo n.º 14
0
 def _run_stochastic_test_poisson(self,
                                  function,
                                  expected_results,
                                  number_of_iterations,
                                  significance_level=0.01,
                                  transformation=None):
     """
     Run the given function for the specified number_of_iterations.
     Uses Bayesian statistics to determine whether the produced results are
     within the specified significance_level of the expected_results.
     """
     K = expected_results.size
     sum_y = zeros(K, dtype=float32)
     x_kr = zeros((number_of_iterations, K), dtype=float32)
     for i in range(number_of_iterations):
         y_r = function()
         x_kr[i, :] = try_transformation(y_r, transformation)
         sum_y = sum_y + x_kr[i, :]
     lambdak = sum_y / float(number_of_iterations)
     lambdanull = try_transformation(expected_results.astype(float32),
                                     transformation)
     #        print lambdak
     #        print lambdanull
     sumxk = sum(x_kr, axis=0)
     LRTS = 2.0 * (
         (number_of_iterations * (lambdanull - lambdak).sum()) +
         (ln(lambdak / ma.masked_where(lambdanull == 0, lambdanull)) *
          sumxk).sum())
     prob = chisqprob(LRTS, K)
     #print LRTS, prob
     logger.log_status(
         "Stochastic Test Poisson: LRTS=" + str(LRTS) + ", df=", str(K),
         ", p=" + str(prob))
     return (prob >= significance_level,
             "prob=%f < significance level of %f" %
             (prob, significance_level))
 def generate_posterior_distribution(self, year, quantity_of_interest, cache_directory=None, values=None, ids=None, procedure="opus_core.bm_normal_posterior",
                                     use_bias_and_variance_from=None, transformed_back=True, transformation_pair = (None, None), **kwargs):
     if cache_directory is not None:
         self.cache_set = array([cache_directory])
         #self.set_cache_attributes(cache_directory)
     else:
         if values is None or ids is None:
             raise StandardError, "Either cache_directory or values and ids must be given."
         
     self.set_posterior(year, quantity_of_interest, values=values, ids=ids, use_bias_and_variance_from=use_bias_and_variance_from, transformation_pair=transformation_pair)
     procedure_class = ModelComponentCreator().get_model_component(procedure)
     self.simulated_values = procedure_class.run(self, **kwargs)
     if transformed_back and (self.transformation_pair_for_prediction[0] is not None): # need to transform back
         self.simulated_values = try_transformation(self.simulated_values,
                                                    self.transformation_pair_for_prediction[1])
     return self.simulated_values
 def generate_posterior_distribution(self, year, quantity_of_interest, procedure="opus_core.bm_normal_posterior",
                                     use_bias_and_variance_from=None, transformed_back=True, aggregate_to=None,
                                     intermediates=[], **kwargs):
     """
     'quantity_of_interest' is a variable name about which we want to get the posterior distribution.
     If there is multiple known_output, it must be made clear from which one the bias and variance
     is to be used (argument use_bias_and_variance_from) If it is None, the first known output is used.
     """
     self.set_posterior(year, quantity_of_interest, use_bias_and_variance_from)
     procedure_class = ModelComponentCreator().get_model_component(procedure)
     self.simulated_values = procedure_class.run(self, **kwargs)
     if transformed_back and (self.transformation_pair_for_prediction[0] is not None): # need to transform back
         self.simulated_values = try_transformation(self.simulated_values,
                                                    self.transformation_pair_for_prediction[1])
     if aggregate_to is not None:
         self.simulated_values = self.aggregate(self.simulated_values, 
                                                aggregate_from=VariableName(quantity_of_interest).get_dataset_name(),
                                                aggregate_to=aggregate_to, intermediates=intermediates)
             
     return self.simulated_values
Exemplo n.º 17
0
    def generate_posterior_distribution(self, year, quantity_of_interest, cache_directory=None, values=None, ids=None, 
                                        procedure="opus_core.bm_normal_posterior", use_bias_and_variance_from=None, 
                                        transformed_back=True, aggregate_to=None,
                                        intermediates=[], propagation_factor=1, no_propagation=True, additive_propagation=False, 
                                        omit_bias=False, **kwargs):

        if (values is None or ids is None) and (self.cache_set is None):
            raise StandardError, "values and ids must be give if the BM object is initialized without cache_file_location."
            
        self.set_posterior(year, quantity_of_interest, values=values, ids=ids, use_bias_and_variance_from=use_bias_and_variance_from, 
                           propagation_factor=propagation_factor, 
                           no_propagation=no_propagation, additive_propagation=additive_propagation, omit_bias=omit_bias)
        procedure_class = ModelComponentCreator().get_model_component(procedure)
        self.simulated_values = procedure_class.run(self, **kwargs)
        if transformed_back and (self.transformation_pair_for_prediction[0] is not None): # need to transform back
            self.simulated_values = try_transformation(self.simulated_values,
                                                       self.transformation_pair_for_prediction[1])
        self.simulated_values_ids = self.m_ids
        if aggregate_to is not None:
            (self.simulated_values, self.simulated_values_ids) = self.aggregate(self.simulated_values, 
                                                   aggregate_from=VariableName(quantity_of_interest).get_dataset_name(),
                                                   aggregate_to=aggregate_to, intermediates=intermediates)
        return self.simulated_values
Exemplo n.º 18
0
 def get_predicted_values(self, transformed_back=False):
     if transformed_back and (self.transformation_pair_for_prediction[0] is not None):
         return try_transformation(self.m, self.transformation_pair_for_prediction[1])
     return self.m
Exemplo n.º 19
0
 def get_transformed_values(self):
     return try_transformation(self.get_values(), self.transformation)
Exemplo n.º 20
0
 def get_observed_data_by_index(self, index, transformed_back=True):
     transformation, inverse_transformation = self.observed_data.get_quantity_object_by_index(index).get_transformation_pair()
     if transformed_back and (transformation is not None):
         return try_transformation(self.y[index], inverse_transformation)
     return self.y[index]
Exemplo n.º 21
0
 def get_data_for_quantity(self, transformed_back=True):
     transformation, inverse_transformation = self.observed_data.get_quantity_object_by_index(self.use_bias_and_variance_index).get_transformation_pair()
     if transformed_back and (transformation is not None):
         return try_transformation(self.y[self.use_bias_and_variance_index], inverse_transformation)
     return self.y[self.use_bias_and_variance_index]
Exemplo n.º 22
0
 def _get_m_from_values(self, values, ids):
     self.m = values
     self.m_ids = ids
     if self.m.ndim < 2:
         self.m = resize(self.m, (self.m.size, 1))
     self.m = try_transformation(self.m, self.transformation_pair_for_prediction[0])