def get_gradient(self, word_in_topic_probs, topic_in_doc_probs):
     """
     Compute gradient of regularizer w.r.t model parameters
     :param word_in_topic_probs: numpy.ndarray
     :param topic_in_doc_probs: numpy.ndarray
     :return: gradients
     """
     self._class_in_topics_probs[:, :], self._topic_in_doc_buffer[:, :], _, _ =\
         compute_frequencies(word_in_doc_freqs=self._class_in_docs,
                             word_in_topic_probs=self._class_in_topics_probs,
                             topic_in_doc_probs=topic_in_doc_probs,
                             word_in_topics_freqs_buffer=self._class_in_topics_buffer,
                             topic_in_doc_freqs_buffer=self._topic_in_doc_buffer)
     self._class_in_topics_probs /= self._class_in_topics_probs.sum(axis=0)
     self._topic_in_doc_buffer /= topic_in_doc_probs
     return self._word_in_topic_buffer, self._tau * self._topic_in_doc_buffer
    def get_gradient(self, word_in_topic_probs, topic_in_doc_probs):
        """
        Compute gradient of regularizer w.r.t model parameters
        :param word_in_topic_probs: numpy.ndarray
        :param topic_in_doc_probs: numpy.ndarray
        :return: gradient
        """

        _, _, topic_freqs, doc_freqs = compute_frequencies(self._word_in_doc_freqs, word_in_topic_probs,
                topic_in_doc_probs, self._word_in_topic_buffer, self._topic_in_doc_buffer)

        self._word_in_topic_buffer[:, :] = 0

        self._topic_in_doc_buffer[:, :] = -self._tau*doc_freqs.reshape(1, -1)
        self._topic_in_doc_buffer[:, :] /= topic_freqs.reshape(-1, 1)

        return self._word_in_topic_buffer, self._topic_in_doc_buffer
Exemplo n.º 3
0
    def _do_em_iteration(self, word_in_doc_freqs, word_in_topic_probs,
                         topic_in_doc_probs, word_in_topic_freqs_buffer,
                         topic_in_doc_freqs_buffer):

        word_in_topics_freqs, topic_in_doc_freqs, _, _ = compute_frequencies(
            word_in_doc_freqs, word_in_topic_probs, topic_in_doc_probs,
            word_in_topic_freqs_buffer, topic_in_doc_freqs_buffer)

        unnormalized_word_in_topic_probs = word_in_topics_freqs
        unnormalized_topic_in_doc_probs = topic_in_doc_freqs

        for regularizer, weight in zip(self._regularizers,
                                       self._regularizer_weights):

            word_in_topic_probs_grad, topic_in_doc_probs_grad = \
                    regularizer.get_gradient(word_in_topic_probs, topic_in_doc_probs)

            word_in_topic_freqs_addition = word_in_topic_probs_grad
            word_in_topic_freqs_addition *= word_in_topic_probs
            word_in_topic_freqs_addition *= weight

            unnormalized_word_in_topic_probs += word_in_topic_freqs_addition

            topic_in_doc_freqs_addition = topic_in_doc_probs_grad
            topic_in_doc_freqs_addition *= topic_in_doc_probs
            topic_in_doc_freqs_addition *= weight

            unnormalized_topic_in_doc_probs += topic_in_doc_freqs_addition

        if not self._fixed_word_in_topic_probs:

            np.clip(unnormalized_word_in_topic_probs,
                    self._epsilon,
                    None,
                    out=unnormalized_word_in_topic_probs)

            word_in_topic_probs[:, :] = unnormalized_word_in_topic_probs
            word_in_topic_probs /= unnormalized_word_in_topic_probs.sum(axis=0)

        np.clip(unnormalized_topic_in_doc_probs,
                self._epsilon,
                None,
                out=unnormalized_topic_in_doc_probs)

        topic_in_doc_probs[:, :] = unnormalized_topic_in_doc_probs
        topic_in_doc_probs /= unnormalized_topic_in_doc_probs.sum(axis=0)
Exemplo n.º 4
0
    def _do_em_iteration(self, word_in_doc_freqs, word_in_topic_probs,
                         topic_in_doc_probs, word_in_topics_freqs_buffer,
                         topic_in_doc_freqs_buffer):

        EPSILON = 1e-10

        word_in_topics_freqs, topic_in_doc_freqs, _, _ = compute_frequencies(
            word_in_doc_freqs, word_in_topic_probs, topic_in_doc_probs,
            word_in_topics_freqs_buffer, topic_in_doc_freqs_buffer)

        unnormalized_word_in_topic_probs = word_in_topics_freqs
        unnormalized_topic_in_doc_probs = topic_in_doc_freqs

        for regularizer, weight in zip(self._regularizers,
                                       self._regularizer_weights):

            word_in_topic_probs_grad, topic_in_doc_probs_grad = \
                    regularizer.get_gradient(word_in_topic_probs, topic_in_doc_probs)

            word_in_topic_freqs_addition = word_in_topic_probs_grad
            word_in_topic_freqs_addition *= word_in_topic_probs
            word_in_topic_freqs_addition *= weight

            unnormalized_word_in_topic_probs += word_in_topic_freqs_addition

            topic_in_doc_freqs_addition = topic_in_doc_probs_grad
            topic_in_doc_freqs_addition *= topic_in_doc_probs
            topic_in_doc_freqs_addition *= weight

            unnormalized_topic_in_doc_probs += topic_in_doc_freqs_addition

        np.clip(unnormalized_word_in_topic_probs,
                0,
                None,
                out=unnormalized_word_in_topic_probs)
        np.clip(unnormalized_topic_in_doc_probs,
                0,
                None,
                out=unnormalized_topic_in_doc_probs)

        word_in_topic_prob_norm_consts = unnormalized_word_in_topic_probs.sum(
            axis=0)
        word_in_topic_prob_norm_const_is_not_small = (
            word_in_topic_prob_norm_consts > EPSILON)

        word_in_topic_probs[:, :] = unnormalized_word_in_topic_probs
        word_in_topic_probs[:,
                            np.logical_not(
                                word_in_topic_prob_norm_const_is_not_small
                            )] = 0
        word_in_topic_probs[:, word_in_topic_prob_norm_const_is_not_small] /= \
                word_in_topic_prob_norm_consts[word_in_topic_prob_norm_const_is_not_small]

        topic_in_doc_prob_norm_consts = unnormalized_topic_in_doc_probs.sum(
            axis=0)
        topic_in_doc_prob_norm_const_is_not_small = (
            topic_in_doc_prob_norm_consts > EPSILON)

        topic_in_doc_probs[:, :] = unnormalized_topic_in_doc_probs
        topic_in_doc_probs[:,
                           np.logical_not(
                               topic_in_doc_prob_norm_const_is_not_small)] = 0
        topic_in_doc_probs[:, topic_in_doc_prob_norm_const_is_not_small] /= \
                topic_in_doc_prob_norm_consts[topic_in_doc_prob_norm_const_is_not_small]