Exemplo n.º 1
0
    def _ppl_log_single_document(self,
                                 docdata):  ### potential underflow problem
        d, docToken, [doc_u, doc_e] = docdata
        prob_w_kv = (self.phiT * self.pi[1] + self.phiB * self.pi[0])
        ppl_w_k_log = -np.sum(np.log(prob_w_kv[:, docToken]), axis=1)
        ppl_w_k_scaled, ppl_w_k_constant = expConstantIgnore(
            -ppl_w_k_log, constant_output=True)  # (actual ppl^(-1))

        prob_e_mk = np.dot(self.psi[doc_u, :], self.eta)
        ppl_e_k_log = -np.sum(
            np.log(prob_e_mk[np.arange(doc_u.shape[0]), :, doc_e]), axis=0)
        ppl_e_k_scaled, ppl_e_k_constant = expConstantIgnore(
            -ppl_e_k_log, constant_output=True)  # (actual ppl^(-1))
        prob_k = self.theta

        # for emoti given words
        prob_e_m = probNormalize(
            np.tensordot(prob_e_mk,
                         np.multiply(prob_k, ppl_w_k_scaled),
                         axes=(1, 0)))
        ppl_e_log = -np.sum(np.log(prob_e_m[np.arange(doc_u.shape[0]), doc_e]))
        # for words given emoti ! same prob_w for different n
        prob_w = probNormalize(
            np.tensordot(prob_w_kv,
                         np.multiply(prob_k, ppl_e_k_scaled),
                         axes=(0, 0)))
        ppl_w_log = -np.sum(np.log(prob_w[docToken]))
        # for both words & emoti
        try:
            ppl_log = -(np.log(
                np.inner(ppl_w_k_scaled, np.multiply(ppl_e_k_scaled, prob_k)))
                        + ppl_w_k_constant + ppl_e_k_constant)
        except FloatingPointError as e:
            raise e
        return ppl_w_log, ppl_e_log, ppl_log
Exemplo n.º 2
0
    def _ppl_log_single_document_off_shell(self,
                                           docdata,
                                           display=False
                                           ):  ### potential underflow problem
        d, docToken, [doc_u, doc_e] = docdata
        prob_w_kv = (self.GLV["phiT"] * self.GLV["pi"][1] +
                     self.GLV["phiB"] * self.GLV["pi"][0])
        ppl_w_k_log = -np.sum(np.log(prob_w_kv[:, docToken]), axis=1)
        ppl_w_k_scaled, ppl_w_k_constant = expConstantIgnore(
            -ppl_w_k_log, constant_output=True)  # (actual ppl^(-1))

        prob_e_mk = np.dot(self.GLV["psi"][doc_u, :], self.GLV["eta"])
        ppl_e_k_log = -np.sum(
            np.log(prob_e_mk[np.arange(doc_u.shape[0]), :, doc_e]), axis=0)
        ppl_e_k_scaled, ppl_e_k_constant = expConstantIgnore(
            -ppl_e_k_log, constant_output=True)  # (actual ppl^(-1))
        prob_k = self.GLV["theta"]

        # for emoti given words
        prob_e_m = probNormalize(
            np.tensordot(prob_e_mk,
                         np.multiply(prob_k, ppl_w_k_scaled),
                         axes=(1, 0)))
        ppl_e_log = -np.sum(np.log(prob_e_m[np.arange(doc_u.shape[0]), doc_e]))
        # for words given emoti ! same prob_w for different n
        prob_w = probNormalize(
            np.tensordot(prob_w_kv,
                         np.multiply(prob_k, ppl_e_k_scaled),
                         axes=(0, 0)))
        ppl_w_log = -np.sum(np.log(prob_w[docToken]))
        # for both words & emoti
        try:
            ppl_log = -(np.log(
                np.inner(ppl_w_k_scaled, np.multiply(ppl_e_k_scaled, prob_k)))
                        + ppl_w_k_constant + ppl_e_k_constant)
        except FloatingPointError as e:
            raise e

        ### test ###
        if display:
            self._log("ppl_log_single_document_off_shell for doc %d" % d)
            self._log("docToken %s" % str(docToken))
            self._log("ppl_w_k_scaled %s" % str(ppl_w_k_scaled))
            self._log("ppl_e_k_scaled %s" % str(ppl_e_k_scaled))
            self._log("prob_e_m %s" % str(prob_e_m))
            self._log("prob_g_m %s" % str(self.GLV["psi"][doc_u, :]))

        return [ppl_w_log, ppl_e_log,
                ppl_log], docToken.shape[0], doc_u.shape[0]
Exemplo n.º 3
0
    def _ppl_log_single_document_off_shell(self, docdata):
        d, docToken, [doc_u, doc_e] = docdata
        Nd = docToken.shape[0]
        prob_z_alpha = np.ones([self.K, Nd], dtype=np.float64)
        ppl_w_z = self.phi[:, docToken]
        prob_z = prob_z_alpha * ppl_w_z
        prob_z_sum = np.sum(
            prob_z, axis=1
        )  # product over dirichlet is the same of sum over dirichlet priors
        prob_e = probNormalize(np.dot(prob_z_sum, self.eta))
        ppl_e_log = -np.sum(np.log(prob_e)[doc_e])

        doc_E = np.sum(np.identity(self.E, dtype=np.float64)[:, doc_e], axis=1)
        docE = probNormalize(doc_E + SMOOTH_FACTOR)
        ppl_e_z_log = -(np.tensordot(np.log(docE), self.eta - 1.0, axes=(0, 1))
                        + np.log(self.eta_beta_inv))
        ppl_e_z_scaled, ppl_e_z_constant = expConstantIgnore(
            -ppl_e_z_log, constant_output=True)

        prob_w = probNormalize(np.dot(ppl_e_z_scaled, self.phi))
        ppl_w_log = -np.sum(np.log(prob_w)[docToken])

        ppl_log = np.nan
        return [ppl_w_log, ppl_e_log,
                ppl_log], docToken.shape[0], doc_e.shape[0]
Exemplo n.º 4
0
    def _predict_single_document_off_shell(self, docdata):
        d, docToken, [doc_u, doc_e] = docdata

        prob_w_kv = (self.GLV["phiT"] * self.GLV["pi"][1] + self.GLV["phiB"] * self.GLV["pi"][0])
        ppl_w_k_log = -np.sum(np.log(prob_w_kv[:, docToken]), axis=1)
        ppl_w_k_scaled, ppl_w_k_constant = expConstantIgnore(- ppl_w_k_log, constant_output=True)
        prob_k = self.GLV["theta"]

        prob_e_mk = np.dot(self.GLV["psi"][doc_u, :], self.GLV["eta"])

        prob_e_m = probNormalize(np.tensordot(prob_e_mk, np.multiply(prob_k, ppl_w_k_scaled), axes=(1, 0)))
        return prob_e_m.tolist(), doc_e.tolist()
Exemplo n.º 5
0
    def _ppl_log_single_document_off_shell(
            self, docdata):  ### potential underflow problem
        d, docToken, [doc_u, doc_e] = docdata
        prob_w_kv = self.GLV["phi"]
        ppl_w_k_log = -np.sum(np.log(prob_w_kv[:, docToken]), axis=1)
        ppl_w_k_scaled, ppl_w_k_constant = expConstantIgnore(
            -ppl_w_k_log, constant_output=True)  # (actual ppl^(-1))

        prob_e_mk = np.dot(
            self.GLV["psi"][doc_u, :],
            (np.tensordot(self.GLV["pi"], self.GLV["eta"], axes=(1, 0)) *
             self.GLV["c"][1] +
             np.tensordot(self.GLV["piB"], self.GLV["eta"], axes=(0, 0)) *
             self.GLV["c"][0]))
        ppl_e_k_log = -np.sum(
            np.log(prob_e_mk[np.arange(doc_u.shape[0]), :, doc_e]), axis=0)
        ppl_e_k_scaled, ppl_e_k_constant = expConstantIgnore(
            -ppl_e_k_log, constant_output=True)  # (actual ppl^(-1))
        prob_k = self.GLV["theta"]

        # for emoti given words
        prob_e_m = probNormalize(
            np.tensordot(prob_e_mk,
                         np.multiply(prob_k, ppl_w_k_scaled),
                         axes=(1, 0)))
        ppl_e_log = -np.sum(np.log(prob_e_m[np.arange(doc_u.shape[0]), doc_e]))
        # for words given emoti ! same prob_w for different n
        prob_w = probNormalize(
            np.tensordot(prob_w_kv,
                         np.multiply(prob_k, ppl_e_k_scaled),
                         axes=(0, 0)))
        ppl_w_log = -np.sum(np.log(prob_w[docToken]))
        # for both words & emoti
        ppl_log = -(np.log(
            np.inner(ppl_w_k_scaled, np.multiply(ppl_e_k_scaled, prob_k))) +
                    ppl_w_k_constant + ppl_e_k_constant)

        return [ppl_w_log, ppl_e_log,
                ppl_log], docToken.shape[0], doc_u.shape[0]