Exemplo n.º 1
0
    def process(self, payload, distribution, topic_id):
        terms = list(map(lambda id: self.tokens[id], payload))
        term_entropies = compute_normalised_entropy(
            distribution, base=2)

        distribution = inference.aggregate_distribution(
            distribution, mode='product', axis=0)

        assert distribution.ndim == 1

        distribution /= distribution.sum()

        if not np.isclose(distribution.sum(), 1.0):
            logging.error('Encountered non-normalized '
                          'distribution for topic "%s" '
                          '(mass=%.10f).',
                          topic_id, distribution.sum())

        self.f_debug_out.write('Topic {0} {1}: {2}\n'.format(
            topic_id,
            math_utils.entropy(
                distribution, base=2, normalize=True),
            zip(terms, term_entropies)))

        ranked_indices = np.argsort(distribution)
        top_ranked_indices = ranked_indices[::-1]

        top_ranked_values = distribution[top_ranked_indices]

        self.rank_callback(topic_id, top_ranked_indices, top_ranked_values)
Exemplo n.º 2
0
    def process(self, payload, distribution, topic_id):
        terms = list(map(lambda id: self.tokens[id], payload))
        term_entropies = compute_normalised_entropy(distribution, base=2)

        distribution = inference.aggregate_distribution(distribution,
                                                        mode='product',
                                                        axis=0)

        assert distribution.ndim == 1

        distribution /= distribution.sum()

        if not np.isclose(distribution.sum(), 1.0):
            logging.error(
                'Encountered non-normalized '
                'distribution for topic "%s" '
                '(mass=%.10f).', topic_id, distribution.sum())

        self.f_debug_out.write('Topic {0} {1}: {2}\n'.format(
            topic_id, math_utils.entropy(distribution, base=2, normalize=True),
            zip(terms, term_entropies)))

        ranked_indices = np.argsort(distribution)
        top_ranked_indices = ranked_indices[::-1]

        top_ranked_values = distribution[top_ranked_indices]

        self.rank_callback(topic_id, top_ranked_indices, top_ranked_values)
Exemplo n.º 3
0
def compute_normalised_entropy(distribution, base=2):
    assert distribution.ndim == 2

    assert np.allclose(distribution.sum(axis=1), 1.0)

    entropies = [
        math_utils.entropy(distribution[i, :], base=base, normalize=True)
        for i in range(distribution.shape[0])]

    return entropies
Exemplo n.º 4
0
def compute_normalised_entropy(distribution, base=2):
    assert distribution.ndim == 2

    assert np.allclose(distribution.sum(axis=1), 1.0)

    entropies = [
        math_utils.entropy(distribution[i, :], base=base, normalize=True)
        for i in range(distribution.shape[0])
    ]

    return entropies