def print_value(self):
     #  TODO: clean up
     if self.layer.model == 'MAX-AND':
         return '\t'.join(
             [str("%.1f" % round(100 * x, 2)) for x in self.val])
     return ', '.join(
         [str(round(lib.expit(np.mean(x)), 3)) for x in [self.val]])
Пример #2
0
def sample_2d_IBP(Z, U, X, lbda, q, alpha):
    """
    IBP update procedure for 2D OrMachine, drawing U and Z where
    U has flat prior and Z comes from IBP with concentration parameter alpha.
    Z[n,l], U[d,l], X[n,d], q: Bernoulli prior, alpha: IPB concentration parameter.
    """

    # lbda = lr.lbda.val
    # alpha = lr.alpha
    # X = lr.child()

    L_new_max = 3  # maxmimum number of new dishes to consider
    N, L = Z.shape  #
    D, _ = U.shape
    posterior_score_fct = get_posterior_score_fct('OR_AND_2D')

    # pre-compute scores for updating L
    # these are loglikelihood contributions of false negative/true negative
    # data points for a range of L'

    # For simple Bernoulli on U
    FN_factor = [
        np.log((expit(lbda) * (1 - 2 * (q**L_temp))) + (q**L_temp))
        for L_temp in range(L_new_max)
    ]
    TN_factor = [
        np.log((expit(-lbda) * (1 - 2 * (q**L_temp))) + (q**L_temp))
        for L_temp in range(L_new_max)
    ]

    for n in range(N):

        # how often is each dish ordered by other customers
        m = (Z[np.arange(N) != n, :] == 1).sum(axis=0)
        columns_to_keep = np.ones(L, dtype=bool)

        for l in range(L):

            # dishes that have already been ordered
            if m[l] > 0:
                # draw z[n,l] as usual
                logit_score = lbda * posterior_score_fct(
                    Z[n, :], U, X[n, :], l)
                logit_prior = logit(m[l] / N)
                Z[n, l] = flip_gibbs_numba(expit(logit_score + logit_prior))
                # print(U[n, l])
                # print('score: ' + str(logit_score))
                # print('\n')
                # print('prior: ' + str(logit_prior))

            elif m[l] == 0:
                # mark columns for removal
                columns_to_keep[l] = False

        # remove marked columns
        Z = Z[:, columns_to_keep]
        U = U[:, columns_to_keep]
        L = columns_to_keep.sum()

        # draw number of new dishes (columns)
        # compute log probability of L' for a range of L' values
        # n_predict = [lom_outputs.OR_AND_product(Z[n, :], U[d, :]) for d in range(D)]
        # faster
        n_predict = lom_outputs.OR_AND_single_n(Z[n, :], U)
        # assert(np.all(n_predict_test==n_predict))

        # compute number of true negatives / false negatives
        TN = ((X[n, :] == -1) * (np.array(n_predict) == -1)).sum()
        FN = ((X[n, :] == 1) * (np.array(n_predict) == -1)).sum()

        lik_L_new = [
            TN * TN_factor[L_temp] + FN * FN_factor[L_temp]
            for L_temp in range(L_new_max)
        ]
        # L_new or L+L_new ??!
        prior_L_new = [(L_temp + L) * np.log(alpha / N) - (alpha / N) -
                       gammaln(L + L_temp + 1) for L_temp in range(L_new_max)]
        log_L_new = [
            loglik + logprior
            for loglik, logprior in zip(lik_L_new, prior_L_new)
        ]
        # map to probabilities
        p_L_new = [
            np.exp(log_L_new[i] - np.max(log_L_new)) for i in range(L_new_max)
        ]
        p_L_new /= np.sum(p_L_new)

        L_new = np.random.choice(range(L_new_max), p=p_L_new)

        if L_new > 0:

            # add new columns to Z
            Z = np.hstack(
                [Z, np.full([N, L_new], fill_value=-1, dtype=np.int8)])
            Z[n, -L_new:] = 1
            U = np.hstack([U, 2 * np.zeros([D, L_new], dtype=np.int8) - 1])

            # sample the new hidden causes
            for l in list(range(L, L + L_new)):
                for d in range(D):
                    logit_score = lbda * posterior_score_fct(
                        U[d, :], Z, X[:, d], l)
                    U[d, l] = flip_gibbs_numba(expit(logit_score))

        L += L_new

        # if L_new > 0:
        #     print(L_new, Z.shape[0])

    return Z, U
    def output(self,
               technique='factor_map',
               noisy_emission=False,
               lazy=False,
               map_to_probabilities=True):
        """
        Compute output matrix from posterior samples.
        Valid techniques are:
            - 'point_estimate'
                output of the current state of factors
            - 'MC' TODO
                'probabilistic output from the MC trace'
            - 'Factor-MAP' TODO
                From the posterior MAP of factors
            - 'Factor-MEAN'
                Computed from posterior mean of factors
        TODO: compute this in a lazy fashion
        Note, that outputs are always probabilities in (0,1)
        """

        # return precomputed value
        if type(self.prediction) is np.ndarray and lazy is True:
            print('returning previously computed value ' +
                  'under disregard of technique.')
            return self.prediction

        # otherwise compute
        if self.model == 'MAX-AND':
            if technique == 'point_estimate':
                out = lom_outputs.MAX_AND_product_2d(
                    [x() for x in self.factors], self.lbda())
            elif technique == 'factor_map':
                out = lom_outputs.MAX_AND_product_2d([
                    np.array(2 * (x.mean() > 0) - 1, dtype=np.int8)
                    for x in self.factors
                ], self.lbda())
            elif technique == 'mc':
                out = np.zeros([x().shape[0] for x in self.factors])
                for t in range(self.lbda.trace.shape[0]):
                    out += lom_outputs.MAX_AND_product_2d(
                        [x.trace[t, :] for x in self.factors],
                        self.lbda.trace[t])
                out /= self.lbda.trace.shape[0]
            elif technique == 'factor_mean':
                out = lom_outputs_fuzzy.MAX_AND_product_fuzzy(
                    .5 * (self.z.mean() + 1), .5 * (self.u.mean() + 1),
                    self.lbda.mean())

        else:
            if technique == 'point_estimate':
                out = aux.lom_generate_data_fast([x() for x in self.factors],
                                                 self.model)
                out = (1 + out) * .5  # map to probability of emitting a 1

            elif technique == 'factor_map':
                out = aux.lom_generate_data_fast(
                    [2 * (x.mean() > 0) - 1 for x in self.factors], self.model)
                out = np.array(
                    out == 1,
                    dtype=np.int8)  # map to probability of emitting a 1

            elif technique == 'factor_mean':
                # output does not need to be mapped to probabilities
                out = aux.lom_generate_data_fast(
                    [(x.mean() + 1) * .5
                     for x in self.factors],  # map to (0,1)
                    self.model,
                    fuzzy=True)

            elif technique == 'factor_mean_old':
                out = aux.lom_generate_data_fuzzy(
                    [x.mean() for x in self.factors], self.model)

            elif technique == 'mc':  # TODO numba
                out = np.zeros([x().shape[0] for x in self.factors])

                for t in range(self.lbda.trace.shape[0]):
                    out += aux.lom_generate_data_fast(
                        [x.trace[t, :] for x in self.factors], self.model)
                out /= self.lbda.trace.shape[0]
                out = (1 + out) * .5  # map to probability of emitting a 1

            # convert to probability of generating a 1
            if noisy_emission is True:
                out = out * aux.expit(self.lbda.mean()) +\
                    (1 - out) * aux.expit(-self.lbda.mean())

        self.prediction = out

        if map_to_probabilities is True:
            return out
        else:
            return 2 * out - 1