def _partial_inference_y_z(self, docdata, max_iter_inner=50): d, docToken, [doc_u, doc_e] = docdata doc_z2 = probNormalize(self.GLV["theta2"]) doc_z1 = probNormalize(self.GLV["theta1"]) doc_z2_old = doc_z2.copy() doc_z1_old = doc_z1.copy() doc_y_old = np.zeros([self.Nd[d], 2]) converge_flag = False for inner_iter in range(max_iter_inner): doc_y = self._partial_inference_y_update( doc_z2=doc_z2, doc_z1=doc_z1, pi_avg=self.GLV["pi"], phi2_avg=self.GLV["phi2"], phi1_avg=self.GLV["phi1"], docToken=docToken ) doc_z2 = self._partial_inference_z_update( theta_avg=self.GLV["theta2"], doc_y_i=doc_y[:, 1], docToken=docToken, phi_avg=self.GLV["phi2"] ) doc_z1 = self._partial_inference_z_update( theta_avg=self.GLV["theta1"], doc_y_i=doc_y[:, 0], docToken=docToken, phi_avg=self.GLV["phi1"] ) doc_z2_old, doc_z1_old, doc_y_old, converge_flag, diff = self._partial_inference_convergeCheck( doc_z2=doc_z2, doc_z1=doc_z1, doc_y=doc_y, doc_z2_old=doc_z2_old, doc_z1_old=doc_z1_old, doc_y_old=doc_y_old, doc_Nd=self.Nd[d] ) if converge_flag: break if not converge_flag: warnings.warn("%d document not converged after %d in partial inference" % (d, max_iter_inner)) return doc_z2, doc_z1, doc_y
def _GibbsSamplingLocal(self, dataE, dataW, dataToken, epoch): """ Gibbs sampling word-level topic """ pbar = tqdm(range(self.D), total=self.D, desc='({0:^3})'.format(epoch)) for d in pbar: # sequentially sampling doc_Nd = self.Nd[d] docE = probNormalize(dataE[d] + SMOOTH_FACTOR) docToken = dataToken[d] for n in range(doc_Nd): w = docToken[n] w_z = self.z[d][n] ## sampling ## # calculate leave-one-out statistics # TI_no_dn, TV_no_dn = self.TI, self.TV TI_no_dn[d, w_z] += -1 TV_no_dn[w_z, w] += -1 # conditional probability # prob_pa = TI_no_dn[d] + self.alpha prob_pb = np.divide(TV_no_dn[:, w] + self.beta, np.sum(TV_no_dn + self.beta, axis=1)) prob_pc = np.multiply( self.eta_beta_inv, np.prod(np.power(docE, self.eta - 1), axis=1)) prob_w_z = probNormalize(prob_pa * prob_pb * prob_pc) # new sampled result # w_z_new = multinomial(prob_w_z) # update # self.z[d][n] = w_z_new TI_no_dn[d, w_z_new] += 1 TV_no_dn[w_z_new, w] += 1 self.TI, self.TV = TI_no_dn, TV_no_dn
def _ppl_log_single_document_off_shell(self, docdata): d, docToken, [doc_u, doc_e] = docdata Nd = docToken.shape[0] prob_z_alpha = np.ones([self.K, Nd], dtype=np.float64) ppl_w_z = self.phi[:, docToken] prob_z = prob_z_alpha * ppl_w_z prob_z_sum = np.sum( prob_z, axis=1 ) # product over dirichlet is the same of sum over dirichlet priors prob_e = probNormalize(np.dot(prob_z_sum, self.eta)) ppl_e_log = -np.sum(np.log(prob_e)[doc_e]) doc_E = np.sum(np.identity(self.E, dtype=np.float64)[:, doc_e], axis=1) docE = probNormalize(doc_E + SMOOTH_FACTOR) ppl_e_z_log = -(np.tensordot(np.log(docE), self.eta - 1.0, axes=(0, 1)) + np.log(self.eta_beta_inv)) ppl_e_z_scaled, ppl_e_z_constant = expConstantIgnore( -ppl_e_z_log, constant_output=True) prob_w = probNormalize(np.dot(ppl_e_z_scaled, self.phi)) ppl_w_log = -np.sum(np.log(prob_w)[docToken]) ppl_log = np.nan return [ppl_w_log, ppl_e_log, ppl_log], docToken.shape[0], doc_e.shape[0]
def _initialize(self, dataE, dataW, dataToken): start = datetime.now() self.theta = probNormalize(np.random.random([self.D, self.K])) self.phi = probNormalize(np.random.random([self.K, self.V])) self.eta = np.random.random([self.K, self.E]) self.z = [] for d in range(self.D): z_dist = self.theta[d] Nd = self.Nd[d] self.z.append(multinomial(z_dist, Nd)) self.eta_beta_inv = multivariateBeta_inv(self.eta) self.TI = np.zeros([self.D, self.K], dtype=np.int32) self.TV = np.zeros([self.K, self.V], dtype=np.int32) for d in range(self.D): docToken = dataToken[d] doc_z = self.z[d] for n in range(self.Nd[d]): w = docToken[n] w_z = doc_z[n] self.TI[d, w_z] += 1 self.TV[w_z, w] += 1 duration = datetime.now() - start print "_initialize() takes %fs" % duration.total_seconds()
def _initialize(self, dataDUE, dataW, dataToken): start = datetime.now() print "start _initialize" self.z2 = probNormalize(np.random.random([self.D, self.K2])) self.z1 = probNormalize(np.random.random([self.D, self.K1])) # self.y = [] # self.x = [] # for d in range(self.D): # self.y.append(probNormalize(np.random.random([self.Nd[d], 2]))) # self.x.append(probNormalize(np.random.random([self.Md[d], self.G]))) self.theta2.initialize(shape=[self.K2], seed=self.alpha2) self.theta1.initialize(shape=[self.K1], seed=self.alpha1) self.pi.initialize(shape=[2], seed=self.delta) self.phi1.initialize(shape=[self.K1, self.V], seed=self.beta) self.phi2.initialize(shape=[self.K2, self.V], seed=self.beta) self.eta.initialize(shape=[self.K2, self.G, self.E], seed=self.gamma, additional_noise_axis=1) self.eta_z.initialize(shape=[self.K1, self.E], seed=self.gamma_z) self.eta_u.initialize(shape=[self.U, self.E], seed=self.gamma_u) self.psi.initialize(shape=[self.U, self.G], seed=self.zeta) self.se.initialize(shape=[3], seed=self.omega) duration = (datetime.now() - start).total_seconds() print "_initialize takes %fs" % duration
def _initialize(self, dataE, dataW, dataToken): start = datetime.now() self.theta = probNormalize(np.random.random([self.E, self.K])) self.phi = probNormalize(np.random.random([self.K, self.V])) self.esp = [] self.z = [] z_dist = np.sum(self.theta, axis=0) / self.E for d in range(self.D): Nd = self.Nd[d] gamma = dataE[d] self.esp.append(multinomial(gamma, Nd)) self.z.append(multinomial(z_dist, Nd)) self.TE = np.zeros([self.K, self.E], dtype=np.int32) self.TV = np.zeros([self.K, self.V], dtype=np.int32) for d in range(self.D): docToken = dataToken[d] doc_z = self.z[d] doc_esp = self.esp[d] for n in range(self.Nd[d]): w = docToken[n] w_z = doc_z[n] w_esp = doc_esp[n] self.TE[w_z, w_esp] += 1 self.TV[w_z, w] += 1 self.TI = np.sum(self.TV, axis=1) self.IE = np.sum(self.TE, axis=0) duration = datetime.now() - start print "_initialize() takes %fs" % duration.total_seconds()
def _initialize(self, dataDUE): start = datetime.now() self.theta = probNormalize(np.random.random([self.D, self.K])) self.phi = probNormalize(np.random.random([self.K, self.V])) self.eta = np.random.random([self.K, self.E]) self.z = [] for d in range(self.D): z_dist = self.theta[d] Nd = self.Nd[d] self.z.append(multinomial(z_dist, Nd)) self.eta_beta_inv = multivariateBeta_inv(self.eta) self.TI = np.zeros([self.D, self.K], dtype=np.int32) self.TV = np.zeros([self.K, self.V], dtype=np.int32) self.dataE_smoothed = {} for docdata in dataDUE.generate(): d, docToken, [doc_u, doc_e] = docdata doc_z = self.z[d] for n in range(self.Nd[d]): w = docToken[n] w_z = doc_z[n] self.TI[d, w_z] += 1 self.TV[w_z, w] += 1 doc_E = np.sum(np.identity(self.E, dtype=np.float64)[:, doc_e], axis=1) docE = probNormalize(doc_E + SMOOTH_FACTOR) self.dataE_smoothed[d] = docE duration = datetime.now() - start self._log("_initialize() takes %fs" % duration.total_seconds())
def _ppl_log_single_document(self, docdata): ### potential underflow problem d, docToken, [doc_u, doc_e] = docdata prob_w_kv = (self.phiT * self.pi[1] + self.phiB * self.pi[0]) ppl_w_k_log = -np.sum(np.log(prob_w_kv[:, docToken]), axis=1) ppl_w_k_scaled, ppl_w_k_constant = expConstantIgnore( -ppl_w_k_log, constant_output=True) # (actual ppl^(-1)) prob_e_mk = np.dot(self.psi[doc_u, :], self.eta) ppl_e_k_log = -np.sum( np.log(prob_e_mk[np.arange(doc_u.shape[0]), :, doc_e]), axis=0) ppl_e_k_scaled, ppl_e_k_constant = expConstantIgnore( -ppl_e_k_log, constant_output=True) # (actual ppl^(-1)) prob_k = self.theta # for emoti given words prob_e_m = probNormalize( np.tensordot(prob_e_mk, np.multiply(prob_k, ppl_w_k_scaled), axes=(1, 0))) ppl_e_log = -np.sum(np.log(prob_e_m[np.arange(doc_u.shape[0]), doc_e])) # for words given emoti ! same prob_w for different n prob_w = probNormalize( np.tensordot(prob_w_kv, np.multiply(prob_k, ppl_e_k_scaled), axes=(0, 0))) ppl_w_log = -np.sum(np.log(prob_w[docToken])) # for both words & emoti try: ppl_log = -(np.log( np.inner(ppl_w_k_scaled, np.multiply(ppl_e_k_scaled, prob_k))) + ppl_w_k_constant + ppl_e_k_constant) except FloatingPointError as e: raise e return ppl_w_log, ppl_e_log, ppl_log
def _estimateGlobal(self): """ give point estimate of global latent variables, self.GLV current: mean """ self.GLV["theta"] = probNormalize(self.theta.data) self.GLV["pi"] = probNormalize(self.pi.data) self.GLV["psi"] = probNormalize(self.psi.data) self.GLV["phi"] = probNormalize(self.phi.data) self.GLV["eta"] = probNormalize(self.eta.data)
def _fit_single_document(docdata, pars_topass, max_iter_inner=500): d, docToken, [doc_u, doc_e] = docdata doc_Nd = pars_topass["Nd"][d] doc_Md = pars_topass["Md"][d] # random initialization # doc_z2 = probNormalize(np.random.random([pars_topass["K2"]])) doc_z1 = probNormalize(np.random.random([pars_topass["K1"]])) doc_r = probNormalize(np.ones([doc_Md, 3], dtype=np.float64)) # old for comparison # doc_x_old = np.zeros([doc_Md, pars_topass["G"]]) doc_y_old = np.zeros([doc_Nd, 2]) doc_r_old = doc_r.copy() doc_z2_old = doc_z2.copy() doc_z1_old = doc_z1.copy() converge_flag = False for inner_iter in range(max_iter_inner): doc_y = _fit_single_document_y_update( doc_z2=doc_z2, doc_z1=doc_z1, docToken=docToken, pars_topass=pars_topass ) doc_x = _fit_single_document_x_update( doc_z2=doc_z2, doc_r=doc_r, doc_u=doc_u, doc_e=doc_e, pars_topass=pars_topass ) doc_r = _fit_single_document_r_update( doc_z2=doc_z2, doc_z1=doc_z1, doc_x=doc_x, doc_u=doc_u, doc_e=doc_e, pars_topass=pars_topass ) doc_z2 = _fit_single_document_z2_update( doc_x=doc_x, doc_y=doc_y, doc_r=doc_r, docToken=docToken, doc_e=doc_e, pars_topass=pars_topass ) doc_z1 = _fit_single_document_z1_update( doc_y=doc_y, doc_r=doc_r, docToken=docToken, doc_e=doc_e, pars_topass=pars_topass ) doc_x_old, doc_y_old, doc_r_old, doc_z2_old, doc_z1_old, converge_flag, diff = \ _fit_single_document_convergeCheck( doc_x=doc_x, doc_y=doc_y, doc_r=doc_r, doc_z2=doc_z2, doc_z1=doc_z1, doc_x_old=doc_x_old, doc_y_old=doc_y_old, doc_r_old=doc_r_old, doc_z2_old=doc_z2_old, doc_z1_old=doc_z1_old, pars_topass=pars_topass ) if converge_flag: break if not converge_flag: warnings.warn("%d document not converged after %d" % (d, max_iter_inner)) return _fit_single_document_return( d=d, doc_x=doc_x, doc_y=doc_y, doc_r=doc_r, doc_z2=doc_z2, doc_z1=doc_z1, docToken=docToken, doc_u=doc_u, doc_e=doc_e, pars_topass=pars_topass )
def _ppl_log_single_document_on_shell(self, docdata): d, docToken, [doc_u, doc_e] = docdata prob_w_kv = (self.GLV["phiT"] * self.GLV["pi"][1] + self.GLV["phiB"] * self.GLV["pi"][0]) prob_w = probNormalize(np.tensordot(prob_w_kv, self.z[d], axes=(0,0))) ppl_w_log = -np.sum(np.log(prob_w[docToken])) prob_e_mk = np.dot(self.GLV["psi"][doc_u, :], self.GLV["eta"]) prob_e_m = probNormalize(np.tensordot(prob_e_mk, self.z[d], axes=(1,0))) ppl_e_log = -np.sum(np.log(prob_e_m[np.arange(doc_u.shape[0]), doc_e])) ppl_log = ppl_w_log + ppl_e_log return [ppl_w_log, ppl_e_log, ppl_log], docToken.shape[0], doc_u.shape[0]
def _initialize(self, dataDUE): start = datetime.now() print "start _initialize" self.z = probNormalize(np.random.random([self.D, self.K])) self.f = probNormalize(np.random.random([self.D, self.A])) self.theta.initialize(shape=[self.K], seed=self.alpha) self.pi.initialize(shape=[self.K, self.A], seed=self.delta) self.phi.initialize(shape=[self.K, self.V], seed=self.beta) self.psi.initialize(shape=[self.U, self.G], seed=self.zeta) self.eta.initialize(shape=[self.A, self.G, self.E], seed=self.gamma) duration = (datetime.now() - start).total_seconds() print "_initialize takes %fs" % duration
def _ppl_log_single_document_off_shell(self, docdata, display=False ): ### potential underflow problem d, docToken, [doc_u, doc_e] = docdata prob_w_kv = (self.GLV["phiT"] * self.GLV["pi"][1] + self.GLV["phiB"] * self.GLV["pi"][0]) ppl_w_k_log = -np.sum(np.log(prob_w_kv[:, docToken]), axis=1) ppl_w_k_scaled, ppl_w_k_constant = expConstantIgnore( -ppl_w_k_log, constant_output=True) # (actual ppl^(-1)) prob_e_mk = np.dot(self.GLV["psi"][doc_u, :], self.GLV["eta"]) ppl_e_k_log = -np.sum( np.log(prob_e_mk[np.arange(doc_u.shape[0]), :, doc_e]), axis=0) ppl_e_k_scaled, ppl_e_k_constant = expConstantIgnore( -ppl_e_k_log, constant_output=True) # (actual ppl^(-1)) prob_k = self.GLV["theta"] # for emoti given words prob_e_m = probNormalize( np.tensordot(prob_e_mk, np.multiply(prob_k, ppl_w_k_scaled), axes=(1, 0))) ppl_e_log = -np.sum(np.log(prob_e_m[np.arange(doc_u.shape[0]), doc_e])) # for words given emoti ! same prob_w for different n prob_w = probNormalize( np.tensordot(prob_w_kv, np.multiply(prob_k, ppl_e_k_scaled), axes=(0, 0))) ppl_w_log = -np.sum(np.log(prob_w[docToken])) # for both words & emoti try: ppl_log = -(np.log( np.inner(ppl_w_k_scaled, np.multiply(ppl_e_k_scaled, prob_k))) + ppl_w_k_constant + ppl_e_k_constant) except FloatingPointError as e: raise e ### test ### if display: self._log("ppl_log_single_document_off_shell for doc %d" % d) self._log("docToken %s" % str(docToken)) self._log("ppl_w_k_scaled %s" % str(ppl_w_k_scaled)) self._log("ppl_e_k_scaled %s" % str(ppl_e_k_scaled)) self._log("prob_e_m %s" % str(prob_e_m)) self._log("prob_g_m %s" % str(self.GLV["psi"][doc_u, :])) return [ppl_w_log, ppl_e_log, ppl_log], docToken.shape[0], doc_u.shape[0]
def _estimateGlobal(self): """ give point estimate of global latent variables, self.GLV current: mean """ self.GLV["theta2"] = probNormalize(self.theta2.data) self.GLV["theta1"] = probNormalize(self.theta1.data) self.GLV["pi"] = probNormalize(self.pi.data) self.GLV["phi1"] = probNormalize(self.phi1.data) self.GLV["phi2"] = probNormalize(self.phi2.data) self.GLV["eta"] = probNormalize(self.eta.data) self.GLV["eta_z"] = probNormalize(self.eta_z.data) self.GLV["eta_u"] = probNormalize(self.eta_u.data) self.GLV["psi"] = probNormalize(self.psi.data) self.GLV["se"] = probNormalize(self.se.data)
def _predict_single_document_on_shell(self, docdata): d, docToken, [doc_u, doc_e] = docdata doc_z = np.array(self.z[d], dtype=np.int8) doc_eta = np.sum(self.eta[doc_z, :], axis=0) prob_e = probNormalize(doc_eta) prob_e_m = np.repeat(prob_e[np.newaxis,:], doc_u.shape[0], axis=0) return prob_e_m.tolist(), doc_e.tolist()
def _initialize(self, dataE, dataW, dataToken): start = datetime.now() self.theta = probNormalize(np.random.random([self.K])) self.pi = probNormalize(np.random.random([2])) self.eta = probNormalize(np.random.random([self.K, self.E])) self.phiB = probNormalize(np.random.random([self.V])) self.phiT = probNormalize(np.random.random([self.K, self.V])) self.z = np.zeros([self.D], dtype=np.int8) self.y = [] for d in range(self.D): self.z[d] = multinomial(self.theta) Nd = self.Nd[d] self.y.append(multinomial(self.pi, Nd)) duration = datetime.now() - start print "_initialize() takes %fs" % duration.total_seconds()
def _ppl(self, dataE, dataW, dataToken): Nd_total = sum(self.Nd) ppl_word = -np.sum(np.multiply(self.TV, np.log(self.phi))) / Nd_total ln_dirichlet = np.tensordot(np.log( probNormalize(dataE + SMOOTH_FACTOR)), self.eta - 1, axes=(-1, -1)) + np.log(self.eta_beta_inv) ppl_emot = -np.sum(np.multiply(self.TI, ln_dirichlet)) / Nd_total return ppl_word, ppl_emot, ppl_word + ppl_emot, np.exp(ppl_emot + ppl_word)
def _predict_single_document_off_shell(self, docdata): d, docToken, [doc_u, doc_e] = docdata Nd = docToken.shape[0] prob_z_alpha = np.ones([self.K, Nd], dtype=np.float64) ppl_w_z = self.phi[:, docToken] prob_z = prob_z_alpha * ppl_w_z prob_z_sum = np.sum(prob_z, axis=1) # product over dirichlet is the same of sum over dirichlet priors prob_e = probNormalize(np.dot(prob_z_sum, self.eta)) prob_e_m = np.repeat(prob_e[np.newaxis,:], doc_u.shape[0], axis=0) return prob_e_m.tolist(), doc_e.tolist()
def _GibbsSamplingLocal(self, dataE, dataW, dataToken, epoch): """ Gibbs sampling word-level emotion and topic """ pbar = tqdm(range(self.D), total = self.D, desc='({0:^3})'.format(epoch)) for d in pbar: # sequentially sampling doc_Nd = self.Nd[d] docE = dataE[d] docToken = dataToken[d] for n in range(doc_Nd): w = docToken[n] w_z = self.z[d][n] w_esp = self.esp[d][n] ## sampling ## # calculate leave-one out statistics # TE_no_dn, TV_no_dn, TI_no_dn, IE_no_dn, = self.TE, self.TV, self.TI, self.IE TE_no_dn[w_z, w_esp] += -1 TV_no_dn[w_z, w] += -1 TI_no_dn[w_z] += -1 IE_no_dn[w_esp] += -1 # conditional probability # prob_w_esp = np.divide(np.multiply((self.alpha + TE_no_dn[w_z]), docE), (self.K * self.alpha + IE_no_dn)) prob_w_esp = probNormalize(prob_w_esp) prob_w_z = np.divide(np.multiply((self.alpha + TE_no_dn[:, w_esp]), (self.beta + TV_no_dn[:, w])), (self.V * self.beta + TI_no_dn)) prob_w_z = probNormalize(prob_w_z) # new sampled result # w_esp_new = multinomial(prob_w_esp) w_z_new = multinomial(prob_w_z) # update # self.z[d][n] = w_z_new self.esp[d][n] = w_esp_new TE_no_dn[w_z_new, w_esp_new] += 1 TV_no_dn[w_z_new, w] += 1 TI_no_dn[w_z_new] += 1 IE_no_dn[w_esp_new] += 1 self.TE, self.TV, self.TI, self.IE = TE_no_dn, TV_no_dn, TI_no_dn, IE_no_dn
def _etaUpdate(self, dataE): """ use standard MLE estimation of eta from dirichlet distribution observation is dataE for each word with word-level topic """ dataE_smoothed = probNormalize(dataE + SMOOTH_FACTOR) eta_est = np.zeros([self.K, self.E]) for k in range(self.K): obs = np.repeat(dataE_smoothed, self.TI[:, k].tolist(), axis=0) eta_est[k] = dirichlet.mle(obs) return eta_est
def _GibbsSamplingLocal(self, dataDUE, epoch): """ Gibbs sampling word-level topic """ pbar = tqdm(dataDUE.generate(), total=self.D_train, desc='({0:^3})'.format(epoch)) for docdata in pbar: # sequentially sampling d, docToken, [doc_u, doc_e] = docdata doc_Nd = self.Nd[d] if d in self.dataE_smoothed: docE = self.dataE_smoothed[d] else: doc_E = np.sum(np.identity(self.E, dtype=np.float64)[:, doc_e], axis=1) docE = probNormalize(doc_E + SMOOTH_FACTOR) self.dataE_smoothed[d] = docE for n in range(doc_Nd): w = docToken[n] w_z = self.z[d][n] ## sampling ## # calculate leave-one-out statistics # TI_no_dn, TV_no_dn = self.TI, self.TV TI_no_dn[d, w_z] += -1 TV_no_dn[w_z, w] += -1 # conditional probability # prob_pa = TI_no_dn[d] + self.alpha prob_pb = np.divide(TV_no_dn[:, w] + self.beta, np.sum(TV_no_dn + self.beta, axis=1)) prob_pc = np.multiply( self.eta_beta_inv, np.prod(np.power(docE, self.eta - 1), axis=1)) prob_w_z = probNormalize(prob_pa * prob_pb * prob_pc) # new sampled result # w_z_new = multinomial(prob_w_z) # update # self.z[d][n] = w_z_new TI_no_dn[d, w_z_new] += 1 TV_no_dn[w_z_new, w] += 1 self.TI, self.TV = TI_no_dn, TV_no_dn
def _predict_single_document_off_shell(self, docdata): d, docToken, [doc_u, doc_e] = docdata prob_w_kv = (self.GLV["phiT"] * self.GLV["pi"][1] + self.GLV["phiB"] * self.GLV["pi"][0]) ppl_w_k_log = -np.sum(np.log(prob_w_kv[:, docToken]), axis=1) ppl_w_k_scaled, ppl_w_k_constant = expConstantIgnore(- ppl_w_k_log, constant_output=True) prob_k = self.GLV["theta"] prob_e_mk = np.dot(self.GLV["psi"][doc_u, :], self.GLV["eta"]) prob_e_m = probNormalize(np.tensordot(prob_e_mk, np.multiply(prob_k, ppl_w_k_scaled), axes=(1, 0))) return prob_e_m.tolist(), doc_e.tolist()
def _ppl_log_single_document_on_shell(self, docdata): d, docToken, [doc_u, doc_e] = docdata doc_z = np.array(self.z[d], dtype=np.int8) doc_eta = np.sum(self.eta[doc_z, :], axis=0) prob_e = probNormalize(doc_eta) ppl_e_log = - np.sum(np.log(prob_e)[doc_e]) ppl_w_log = - np.sum(np.log(self.phi)[doc_z, docToken]) ppl_log = ppl_e_log + ppl_w_log return [ppl_w_log, ppl_e_log, ppl_log], docToken.shape[0], doc_e.shape[0]
def _initialize(self, dataDUE, dataW, dataToken): start = datetime.now() self.theta = probNormalize(np.random.random([self.K])) self.pi = probNormalize(np.random.random([2])) self.eta = probNormalize( np.random.random([self.K, self.G, self.E]) + 0.1) self.phiB = probNormalize(np.random.random([self.V])) self.phiT = probNormalize(np.random.random([self.K, self.V])) self.psi = probNormalize(np.random.random([self.U, self.G])) self.z = np.zeros([self.D], dtype=np.int8) self.y = [] self.x = [] for d in range(self.D): self.z[d] = multinomial(self.theta) self.y.append(multinomial(self.pi, self.Nd[d])) ## time consuming, replaced with below ## # doc_x = [] # for m in range(self.Md[d]): # u = np.random.randint(0,self.U) # doc_x.append(multinomial(self.psi[u])) # self.x.append(np.array(doc_x, dtype=np.int8)) self.x.append(multinomial(self.psi[0], self.Md[d])) duration = datetime.now() - start self._log("_initialize() takes %fs" % duration.total_seconds())
def _estimateGlobal(self, dataDUE=None): self.theta = probNormalize(self.alpha + self.TI) self.pi = probNormalize(self.delta + self.YI) self.phiB = probNormalize(self.Y0V + self.beta) self.phiT = probNormalize(self.Y1TV + self.beta) self.eta = probNormalize(self.TXE + self.gamma) self.psi = probNormalize(self.UX + self.zeta)
def _ppl_log_single_document_off_shell( self, docdata): ### potential underflow problem d, docToken, [doc_u, doc_e] = docdata prob_w_kv = self.GLV["phi"] ppl_w_k_log = -np.sum(np.log(prob_w_kv[:, docToken]), axis=1) ppl_w_k_scaled, ppl_w_k_constant = expConstantIgnore( -ppl_w_k_log, constant_output=True) # (actual ppl^(-1)) prob_e_mk = np.dot( self.GLV["psi"][doc_u, :], (np.tensordot(self.GLV["pi"], self.GLV["eta"], axes=(1, 0)) * self.GLV["c"][1] + np.tensordot(self.GLV["piB"], self.GLV["eta"], axes=(0, 0)) * self.GLV["c"][0])) ppl_e_k_log = -np.sum( np.log(prob_e_mk[np.arange(doc_u.shape[0]), :, doc_e]), axis=0) ppl_e_k_scaled, ppl_e_k_constant = expConstantIgnore( -ppl_e_k_log, constant_output=True) # (actual ppl^(-1)) prob_k = self.GLV["theta"] # for emoti given words prob_e_m = probNormalize( np.tensordot(prob_e_mk, np.multiply(prob_k, ppl_w_k_scaled), axes=(1, 0))) ppl_e_log = -np.sum(np.log(prob_e_m[np.arange(doc_u.shape[0]), doc_e])) # for words given emoti ! same prob_w for different n prob_w = probNormalize( np.tensordot(prob_w_kv, np.multiply(prob_k, ppl_e_k_scaled), axes=(0, 0))) ppl_w_log = -np.sum(np.log(prob_w[docToken])) # for both words & emoti ppl_log = -(np.log( np.inner(ppl_w_k_scaled, np.multiply(ppl_e_k_scaled, prob_k))) + ppl_w_k_constant + ppl_e_k_constant) return [ppl_w_log, ppl_e_log, ppl_log], docToken.shape[0], doc_u.shape[0]
def _predict_single_document_core(self, doc_z2, doc_z1, docdata): d, docToken, [doc_u, doc_e] = docdata prob_e_r0 = self.GLV["eta_u"][doc_u, :] prob_e_r1 = np.dot(doc_z1, self.GLV["eta_z"]) prob_e_r2 = np.tensordot( doc_z2, np.tensordot( self.GLV["psi"][doc_u, :], self.GLV["eta"], axes=(1, 1) ), axes=(0, 1) ) prob_e = self.GLV["se"][0] * prob_e_r0 + self.GLV["se"][1] * prob_e_r1 + self.GLV["se"][2] * prob_e_r2 prob_e = probNormalize(prob_e) return prob_e.tolist(), doc_e.tolist()
def _initialize(self, dataDUE, dataW, dataToken): start = datetime.now() print "start _initialize" self.z = probNormalize(np.random.random([self.D, self.K])) # self.y = [] # self.x = [] # for d in range(self.D): # self.y.append(probNormalize(np.random.random([self.Nd[d], 2]))) # self.x.append(probNormalize(np.random.random([self.Md[d], self.G]))) self.theta.initialize(shape=[self.K], seed=self.alpha) self.pi.initialize(shape=[2], seed=self.delta) self.phiB.initialize(shape=[self.V], seed=self.beta) self.phiT.initialize(shape=[self.K, self.V], seed=self.beta) self.psi.initialize(shape=[self.U, self.G], seed=self.zeta) self.eta.initialize(shape=[self.K, self.G, self.E], seed=self.gamma, additional_noise_axis=1) duration = (datetime.now() - start).total_seconds() print "_initialize takes %fs" % duration
def _predict_single_document_on_shell(self, docdata): d, docToken, [doc_u, doc_e] = docdata prob_e_mk = np.dot(self.GLV["psi"][doc_u, :], self.GLV["eta"]) prob_e_m = probNormalize(np.tensordot(prob_e_mk, self.z[d], axes=(1,0))) return prob_e_m.tolist(), doc_e.tolist()
def _ppl(self, dataE, dataW, dataToken): prob_dw = probNormalize(np.tensordot(np.tensordot(dataE, self.theta, axes=(-1,0)), self.phi, axes=(-1,0))) ppl = - np.sum(dataW.multiply(np.log(prob_dw)))/sum(self.Nd) return ppl, np.exp(ppl)