示例#1
0
文件: m1a.py 项目: yorkerlin/ep-stan
 def simulate_data(self, Sigma_x=None, seed=None):
     """Simulate data from the model.
     
     Returns models.common.data instance
     
     Parameters
     ----------
     Sigma_x : {None, 'rand', ndarray}
         The covariance structure of the explanatory variable. This is 
         scaled to regulate the uncertainty. If not provided or None, 
         identity matrix is used. Providing string 'rand' uses method
         common.rand_corr_vine to randomise one.
     
     """
     # Localise params
     J = self.J
     D = self.D
     npg = self.npg
     
     # Set seed
     rnd_data = np.random.RandomState(seed=seed)
     # Draw random seed for input covariance for consistency in randomness
     # even if not needed
     seed_input_cov = rnd_data.randint(2**31-1)
     
     # Randomise input covariance structure if needed
     if Sigma_x == 'rand':
         Sigma_x = rand_corr_vine(D, seed=seed_input_cov)
     
     # Parameters
     # Number of observations for each group
     if hasattr(npg, '__getitem__') and len(npg) == 2:
         Nj = rnd_data.randint(npg[0],npg[1]+1, size=J)
     else:
         Nj = npg*np.ones(J, dtype=np.int64)
     # Total number of observations
     N = np.sum(Nj)
     # Observation index limits for J groups
     j_lim = np.concatenate(([0], np.cumsum(Nj)))
     # Group indices for each sample
     j_ind = np.empty(N, dtype=np.int64)
     for j in xrange(J):
         j_ind[j_lim[j]:j_lim[j+1]] = j
     
     # Assign parameters
     if SIGMA is None:
         sigma = np.exp(rnd_data.randn()*SIGMA_H)
     else:
         sigma = SIGMA
     if SIGMA_A is None:
         sigma_a = np.exp(rnd_data.randn()*SIGMA_AH)
     else:
         sigma_a = SIGMA_A
     if BETA is None:
         beta = rnd_data.randn(D)*SIGMA_B
     else:
         beta = BETA
     
     # Regulate beta
     beta_sum = np.sum(beta)
     while np.abs(beta_sum) < B_ABS_MIN_SUM:
         # Replace one random element in beta
         index = rnd_data.randint(D)
         beta_sum -= beta[index]
         beta[index] = rnd_data.randn()*SIGMA_B
         beta_sum += beta[index]
     
     alpha_j = rnd_data.randn(J)*sigma_a
     phi_true = np.empty(self.dphi)
     phi_true[0] = np.log(sigma)
     phi_true[1] = np.log(sigma_a)
     phi_true[2:] = beta
     
     # Determine suitable sigma_x
     sigma_x = calc_input_param_lin_reg(beta, sigma, Sigma_x)
     
     # Simulate data
     if Sigma_x is None:
         X = rnd_data.randn(N,D)*sigma_x
     else:
         cho_x = cholesky(Sigma_x)
         X = rnd_data.randn(N,D).dot(sigma_x*cho_x)
     y_true = alpha_j[j_ind] + X.dot(beta)
     y = y_true + rnd_data.randn(N)*sigma
     
     return data(
         X, y, {'sigma_x':sigma_x, 'Sigma_x':Sigma_x}, y_true, Nj, j_lim, 
         j_ind, {'phi':phi_true, 'alpha':alpha_j, 'beta':beta, 
         'sigma':sigma}
     )
示例#2
0
文件: m4a.py 项目: amoliu/ep-stan
    def simulate_data(self, Sigma_x=None, seed=None):
        """Simulate data from the model.
        
        Returns models.common.data instance
        
        Parameters
        ----------
        Sigma_x : {None, 'rand', ndarray}
            The covariance structure of the explanatory variable. This is 
            scaled to regulate the uncertainty. If not provided or None, 
            identity matrix is used. Providing string 'rand' uses method
            common.rand_corr_vine to randomise one.
        
        """
        # Localise params
        J = self.J
        D = self.D
        npg = self.npg

        # Set seed
        rnd_data = np.random.RandomState(seed=seed)
        # Draw random seed for input covariance for consistency in randomness
        # even if not needed
        seed_input_cov = rnd_data.randint(2 ** 31 - 1)

        # Randomise input covariance structure if needed
        if Sigma_x == "rand":
            Sigma_x = rand_corr_vine(D, seed=seed_input_cov)

        # Parameters
        # Number of observations for each group
        if hasattr(npg, "__getitem__") and len(npg) == 2:
            Nj = rnd_data.randint(npg[0], npg[1] + 1, size=J)
        else:
            Nj = npg * np.ones(J, dtype=np.int64)
        # Total number of observations
        N = np.sum(Nj)
        # Observation index limits for J groups
        j_lim = np.concatenate(([0], np.cumsum(Nj)))
        # Group indices for each sample
        j_ind = np.empty(N, dtype=np.int64)
        for j in xrange(J):
            j_ind[j_lim[j] : j_lim[j + 1]] = j

        # Assign parameters
        if SIGMA is None:
            sigma = np.exp(rnd_data.randn() * SIGMA_H)
        else:
            sigma = SIGMA
        if SIGMA_A is None:
            sigma_a = np.exp(rnd_data.randn() * SIGMA_SA)
        else:
            sigma_a = SIGMA_A
        if MU_A is None:
            mu_a = rnd_data.randn() * SIGMA_MA
        else:
            mu_a = MU_A
        sigma_b = np.exp(rnd_data.randn(D) * SIGMA_SB)
        mu_b = rnd_data.randn(D) * SIGMA_MB
        alpha_j = mu_a + rnd_data.randn(J) * sigma_a
        beta_j = mu_b + rnd_data.randn(J, D) * sigma_b

        # Regulate beta
        for j in xrange(J):
            beta_sum = np.sum(beta_j[j])
            while np.abs(beta_sum) < B_ABS_MIN_SUM:
                # Replace one random element in beta
                index = rnd_data.randint(D)
                beta_sum -= beta_j[j, index]
                beta_j[j, index] = mu_b[index] + rnd_data.randn() * sigma_b[index]
                beta_sum += beta_j[j, index]

        phi_true = np.empty(self.dphi)
        phi_true[0] = np.log(sigma)
        phi_true[1] = mu_a
        phi_true[2] = np.log(sigma_a)
        phi_true[3 : 3 + D] = mu_b
        phi_true[3 + D :] = np.log(sigma_b)

        # Determine suitable sigma_x
        sigma_x_j = calc_input_param_lin_reg(beta_j, sigma, Sigma_x)

        # Simulate data
        # Different sigma_x for every group
        X = np.empty((N, D))
        if Sigma_x is None:
            for j in xrange(J):
                X[j_lim[j] : j_lim[j + 1], :] = rnd_data.randn(Nj[j], D) * sigma_x_j[j]
        else:
            cho_x = cholesky(Sigma_x)
            for j in xrange(J):
                X[j_lim[j] : j_lim[j + 1], :] = rnd_data.randn(Nj[j], D).dot(sigma_x_j[j] * cho_x)
        y_true = np.empty(N)
        for n in xrange(N):
            y_true[n] = alpha_j[j_ind[n]] + X[n].dot(beta_j[j_ind[n]])
        y = y_true + rnd_data.randn(N) * sigma

        return data(
            X,
            y,
            {"sigma_x": sigma_x_j, "Sigma_x": Sigma_x},
            y_true,
            Nj,
            j_lim,
            j_ind,
            {"phi": phi_true, "alpha": alpha_j, "beta": beta_j, "sigma": sigma},
        )