Пример #1
0
    def write_to_disk(self, path, cols):
        if not os.path.exists(os.path.split(path)[0]):
            os.mkdir(os.path.split(path)[0])
        if os.path.exists(path):
            os.remove(path)
        conn = sql.connect(path)

        V_df = pd.DataFrame(
            data.euclidean_to_hypercube(self.G.T[:cols].T),
            columns=['V_{}'.format(i) for i in range(cols)],
        )
        a_df = pd.DataFrame(
            self.alpha.T[:cols].T,
            columns=['alpha_{}'.format(i) for i in range(cols)])
        b_df = pd.DataFrame(self.beta.T[:cols].T,
                            columns=['beta_{}'.format(i) for i in range(cols)])
        d_df = pd.DataFrame(
            self.delta.reshape(1, -1),
            columns=['delta_{}'.format(i) for i in range(self.nDat)])
        p_df = pd.DataFrame(
            self.p.reshape(1, -1),
            columns=['p_{}'.format(i) for i in range(self.nMix)])

        V_df.to_sql('data', conn, index=False)
        a_df.to_sql('alphas', conn, index=False)
        b_df.to_sql('betas', conn, index=False)
        d_df.to_sql('deltas', conn, index=False)
        p_df.to_sql('ps', conn, index=False)

        conn.commit()
        conn.close()
        return
Пример #2
0
 def populate_cones(self, epsilon):
     postpred = euclidean_to_hypercube(
         self.generate_posterior_predictive_gammas())
     C_damex = (postpred > epsilon)
     cones = defaultdict(lambda: 1e-10)
     for row in C_damex:
         cones[tuple(row)] += 1 / postpred.shape[0]
     return cones
Пример #3
0
 def generate_posterior_predictive_hypercube(self, n_per_sample=1):
     postpred = np.empty((self.nSamp, n_per_sample, self.nCol))
     for n in range(self.nSamp):
         delta_new = choice(self.nMix, n_per_sample, p=self.samples.pi[n])
         zeta_new = self.samples.zeta[n, delta_new]
         postpred[n] = euclidean_to_simplex(
             gamma(shape=zeta_new, size=(n_per_sample, self.nCol)), )
     simplex = postpred.reshape(self.nSamp * n_per_sample, self.nCol)
     return euclidean_to_hypercube(simplex)
Пример #4
0
 def hypercube_distance(self):
     mr = self.r.mean(axis=0)
     mrho = self.rho.mean(axis=0)
     Y = np.hstack((mr[:, None] * self.data.Yp, mrho))
     V = euclidean_to_hypercube(Y)
     return hypercube_distance_matrix(
         self.generate_posterior_predictive_gammas(),
         V,
         self.pool,
     )
Пример #5
0
 def generate_posterior_predictive_hypercube(self, n_per_sample=1):
     postpred = np.empty((self.nSamp, n_per_sample, self.nCol))
     for n in range(self.nSamp):
         delta_new = choice(self.nMix, n_per_sample, p=self.samples.p[n])
         alpha_new = self.samples.alpha[n][delta_new]
         beta_new = self.samples.beta[n][delta_new]
         postpred[n] = euclidean_to_hypercube(
             gamma(shape=alpha_new,
                   scale=1 / beta_new,
                   size=(n_per_sample, self.nCol)))
     return postpred.reshape(-1, self.nCol)
Пример #6
0
 def cone_density(self, epsilon=0.5, **kwargs):
     cone_prob = self.populate_cones(epsilon)
     scores = np.empty(self.data.nDat)
     try:
         Y = euclidean_to_hypercube(
             np.hstack((self.samples.r.mean(axis=0)[:, None] * self.data.V,
                        self.samples.rho.mean(axis=0))))
     except AttributeError:
         Y = self.data.V
     for i in range(self.nDat):
         scores[i] = cone_prob[tuple(Y[i] > epsilon)]
     return scores
Пример #7
0
 def generate_posterior_predictive_hypercube(self, n_per_sample=1, m=10):
     gammas = self.generate_posterior_predictive_gammas(n_per_sample, m)
     hypcube = euclidean_to_hypercube(gammas[:, :self.nCol])
     simplex = []
     cat_idx = np.where(self.sigma_unity)[0][1:]
     for i in range(cat_idx.shape[0]):
         cat_start = cat_idx[i]
         try:
             cat_end = cat_idx[i + 1]
         except IndexError:
             cat_end = self.sigma_unity.shape[0]
         simplex.append(euclidean_to_simplex(gammas[:, cat_start:cat_end]))
     return np.hstack([hypcube] + simplex)
Пример #8
0
 def hypercube_distance_latent(self):
     R = self.generate_conditional_posterior_predictive_radii()  # (s,n)
     Y1 = R[:, :, None] * self.data.V[None, :, :]  # (s,n,d1),
     Y2 = self.generate_conditional_posterior_predictive_gammas(
     )[:, :, self.nCol:]  # (s,n,d2)
     Y_con = np.swapaxes(np.concatenate((Y1, Y2), axis=2), 0,
                         1)  # (n, s, d)
     V_con = np.array(list(map(euclidean_to_hypercube, Y_con)))
     V_new = euclidean_to_hypercube(
         self.generate_posterior_predictive_gammas())
     # s1 = choice(np.arange(R.shape[0]), size = R.shape[0]//2, replace = False)
     # s2 = choice(np.arange(R.shape[0]), size = R.shape[0]//2, replace = False)
     # res = self.pool.map(hypercube_dmat, zip(repeat(V_new[s1]), V_con[:,s2]))
     s = np.random.choice(V_new.shape[0], V_new.shape[0] // 2, False)
     res = self.pool.map(hypercube_dmat, zip(repeat(V_new), V_con[:, s]))
     return np.array(list(res))
Пример #9
0
 def generate_posterior_predictive_hypercube(self, n_per_sample=1, m=10):
     gammas = self.generate_posterior_predictive_gammas(n_per_sample, m)
     # hypercube transformation for real variates
     hypcube = euclidean_to_hypercube(gammas[:, :self.nCol])
     # simplex transformation for categ variates
     simplex_reverse = []
     indices = list(np.arange(self.nCol + self.nCat))
     # Foe each category, last first
     for i in list(range(self.cats.shape[0]))[::-1]:
         # identify the ending index (+1 to include boundary)
         cat_length = self.cats[i]
         cat_end = indices.pop() + 1
         # identify starting index
         for _ in range(cat_length - 1):
             cat_start = indices.pop()
         # transform gamma variates to simplex
         simplex_reverse.append(
             euclidean_to_simplex(gammas[:, cat_start:cat_end]))
     # stack hypercube and categorical variables side by side.
     return np.hstack([hypcube] + simplex_reverse[::-1])
Пример #10
0
    def write_to_disk(self, path, nCol):
        if not os.path.exists(path):
            os.mkdir(path)

        V = data.euclidean_to_hypercube(self.G.T[:nCol].T)
        R = pareto(1, size=self.nSamp) * (np.ones(self.nSamp) + 0.3 * self.y)

        Z = (V.T * R).T

        Z_df = pd.DataFrame(Z, columns=['Z_{}'.format(i) for i in range(nCol)])
        y_df = pd.DataFrame({'y': self.y})

        z_path = os.path.join(path,
                              'ad_sim_m{}_c{}_x.csv'.format(self.nMix, nCol))
        y_path = os.path.join(path,
                              'ad_sim_m{}_c{}_y.csv'.format(self.nMix, nCol))

        Z_df.to_csv(z_path, index=False)
        y_df.to_csv(y_path, index=False)
        return
Пример #11
0
 def generate_posterior_predictive_hypercube(self, n_per_sample=1, m=10):
     gammas = self.generate_posterior_predictive_gammas(n_per_sample, m)
     return euclidean_to_hypercube(gammas)
Пример #12
0
 def generate_posterior_predictive_hypercube(self, n_per_sample=1):
     euc = self.generate_posterior_predictive_gammas(n_per_sample)
     return euclidean_to_hypercube(euc)
Пример #13
0
 def hypercube_distance_real(self):
     Vnew = euclidean_to_hypercube(
         self.generate_posterior_predictive_gammas()[:, :self.nCol], )
     return hypercube_distance_matrix(Vnew, self.data.V, self.pool)