示例#1
0
 def test_topics(self):
     naive = {"Health": 0, "Sports": 0}
     for pi in self.postinteractions_set:
         for (topic, weight) in self._interactions_weights(pi):
             naive[topic] += weight
     topics = {topic: utils.atan_norm(score) for (topic, score) in naive.items()}
     self.assertEqual(self.user.topics, topics)
示例#2
0
 def test_classify(self, _s3_mock):
     weights = classifier.SimpleWeights.load(classifier.s3_key_xreadlines())
     self.assertEqual(
         weights.classify("About 75% of New York City School students qualify "
                          "for free or reduced-price lunch."),
         {'Healthcare': 0,
          'Education': atan_norm(0.6 + 0.9)} # students, school
     )
示例#3
0
    def classify(self, corpus, *topics):
        """Classify `corpus` based on number of occurrences of words and phrases, and their
        weights, in the SimpleWeights dictionary.

        By default, `corpus` is classified for all topics for which there are weights.
        Alternatively, topics may be specified as arbitrary arguments:

            SIMPLE_WEIGHTS.classify(corpus, 'healthcare', 'cooking', ...)

        """
        return {topic: atan_norm(score) for (topic, score) in self.iter_topics(corpus, *topics)}
示例#4
0
文件: user.py 项目: edgeflip/edgeflip
    def get_topics(post_interactions, post_topics):
        """Return a User's interests scored by topic, given an iterable of the user's
        PostInteractions and a catalog of PostTopics.

        """
        scores = collections.defaultdict(int)
        for interaction in post_interactions:
            try:
                catalogued = post_topics[interaction.postid]
            except KeyError:
                topics = {}
            else:
                topics = catalogued.document

            for (topic, value) in topics.items():
                # For now, all interactions weighted the same:
                for (_interaction_type, count) in interaction.document.items():
                    scores[topic] += value * count

        # Normalize topic scores to 1:
        return {topic: atan_norm(value) for (topic, value) in scores.items()}