Exemplo n.º 1
0
    def _initialize(self, dataDUE):
        start = datetime.now()

        self.theta = probNormalize(np.random.random([self.D, self.K]))
        self.phi = probNormalize(np.random.random([self.K, self.V]))
        self.eta = np.random.random([self.K, self.E])
        self.z = []
        for d in range(self.D):
            z_dist = self.theta[d]
            Nd = self.Nd[d]
            self.z.append(multinomial(z_dist, Nd))
        self.eta_beta_inv = multivariateBeta_inv(self.eta)

        self.TI = np.zeros([self.D, self.K], dtype=np.int32)
        self.TV = np.zeros([self.K, self.V], dtype=np.int32)
        self.dataE_smoothed = {}
        for docdata in dataDUE.generate():
            d, docToken, [doc_u, doc_e] = docdata
            doc_z = self.z[d]
            for n in range(self.Nd[d]):
                w = docToken[n]
                w_z = doc_z[n]
                self.TI[d, w_z] += 1
                self.TV[w_z, w] += 1
            doc_E = np.sum(np.identity(self.E, dtype=np.float64)[:, doc_e], axis=1)
            docE = probNormalize(doc_E + SMOOTH_FACTOR)
            self.dataE_smoothed[d] = docE

        duration = datetime.now() - start
        self._log("_initialize() takes %fs" % duration.total_seconds())
Exemplo n.º 2
0
    def _initialize(self, dataE, dataW, dataToken):
        start = datetime.now()

        self.theta = probNormalize(np.random.random([self.D, self.K]))
        self.phi = probNormalize(np.random.random([self.K, self.V]))
        self.eta = np.random.random([self.K, self.E])
        self.z = []
        for d in range(self.D):
            z_dist = self.theta[d]
            Nd = self.Nd[d]
            self.z.append(multinomial(z_dist, Nd))
        self.eta_beta_inv = multivariateBeta_inv(self.eta)

        self.TI = np.zeros([self.D, self.K], dtype=np.int32)
        self.TV = np.zeros([self.K, self.V], dtype=np.int32)
        for d in range(self.D):
            docToken = dataToken[d]
            doc_z = self.z[d]
            for n in range(self.Nd[d]):
                w = docToken[n]
                w_z = doc_z[n]
                self.TI[d, w_z] += 1
                self.TV[w_z, w] += 1

        duration = datetime.now() - start
        print "_initialize() takes %fs" % duration.total_seconds()
Exemplo n.º 3
0
 def _restoreCheckPoint(self, filename = None):
     if filename is None:
         filename = self.checkpoint_file
     state = cPickle.load(open(filename, "r"))
     # restore #
     self.theta = state["theta"]
     self.phi = state["phi"]
     self.eta = state["eta"]
     self.eta_beta_inv = multivariateBeta_inv(self.eta)
     self.alpha = state["alpha"]
     self.beta = state["beta"]
     self.z = state["z"]
     self.TV = state["TV"]
     self.TI = state["TI"]
     epoch = state["epoch"]
     ppl = state["ppl"]
     print "restore state from file '%s' on epoch %d with ppl: %s" % (filename, epoch, str(ppl))
Exemplo n.º 4
0
 def _estimateGlobal(self):
     self.theta = probNormalize(self.TI + self.alpha)
     self.phi = probNormalize(self.TV + self.beta)
     self.eta = self._etaUpdate()
     self.eta_beta_inv = multivariateBeta_inv(self.eta)