예제 #1
0
 def add_singleton_cluster(self, storage, site_summary, opinion):
     cluster = Cluster(
         pk=self.cluster_id.next(),
         site_summary=site_summary,
         primary_description=opinion.description,
         primary_comment=None,
         positive=opinion.positive,
         size=1,
     )
     storage.save(cluster)
     comment = Comment(
         pk=self.comment_id.next(),
         description=opinion.description,
         opinion_id=opinion.id,
         cluster=cluster,
         score=1.0,
     )
     storage.save(comment)
     cluster.primary_comment = comment
     storage.save(cluster)
예제 #2
0
파일: tests.py 프로젝트: tofumatt/reporter
 def make_clusters(summary, type, numcomments):
     """Create a bunch of clusters for the given summary."""
     numcreated = 0
     for csize in [NUM_PRAISE - NUM_ISSUES, NUM_ISSUES]:
         if numcreated >= numcomments:
             break
         cluster = Cluster(site_summary=summary, size=csize)
         for i in xrange(csize):
             if i == 0:
                 cluster.save()
             c = make_comment(cluster, csize, i, type)
             if i == 0:
                 cluster.primary_description = c.description
                 cluster.primary_comment = c
                 cluster.save()
         numcreated += csize
예제 #3
0
파일: tests.py 프로젝트: x1B/reporter
 def make_clusters(summary, type, numcomments):
     """Create a bunch of clusters for the given summary."""
     numcreated = 0
     for csize in [NUM_PRAISE - NUM_ISSUES, NUM_ISSUES]:
         if numcreated >= numcomments: break
         cluster = Cluster(site_summary=summary, size=csize)
         for i in xrange(csize):
             if i == 0:
                 cluster.save()
             c = make_comment(cluster, csize, i, type)
             if i == 0:
                 cluster.primary_description = c.description
                 cluster.primary_comment = c
                 cluster.save()
         numcreated += csize
예제 #4
0
    def generate_clusters_for(self, err, storage, group):
        num_clusters = 0
        site_summary = SiteSummary(
            pk=self.site_summary_id.next(),
            size=len(group.opinion_pks),
            issues_count=group.positive_counts[0],
            praise_count=group.positive_counts[1],
            **group.key
        )
        storage.save(site_summary)
        group_positive = group.key["positive"]

        # Handle single-comment case:
        if site_summary.size == 1:
            opinion = Opinion.objects.get(pk=group.opinion_pks[0])
            self.add_singleton_cluster(storage, site_summary, opinion)
            return

        opinions = Opinion.objects.filter(pk__in=group.opinion_pks)

        # Handle cluster case, make one corpus for positive, one for negative.
        for positive in (0, 1):
            if group_positive is not None and positive != group_positive:
                continue
            corpus = Corpus()
            remaining_opinions = {}
            for opinion in opinions:
                if opinion.positive != positive:
                    continue
                remaining_opinions[opinion.id] = opinion
                corpus.add(opinion, str=unicode(opinion.description))
            clusters = corpus.cluster()
            for next in clusters:
                primary = {"object": next.primary, "similarity": 1.0}
                comments = [
                    Comment(
                        pk=self.comment_id.next(),
                        description=doc["object"].description,
                        opinion_id=doc["object"].id,
                        score=doc["similarity"],
                    )
                    for doc in [primary] + next.similars
                ]
                cluster = Cluster(
                    pk=self.cluster_id.next(),
                    site_summary=site_summary,
                    primary_description=comments[0].description,
                    primary_comment=None,
                    positive=positive,
                    size=len(comments),
                )
                storage.save(cluster)
                for comment in comments:
                    del remaining_opinions[comment.opinion_id]
                    comment.cluster = cluster
                    storage.save(comment)
                cluster.primary_comment = comments[0]
                cluster.save()

            # Add singletons for remaining opinions
            for opinion in remaining_opinions.values():
                self.add_singleton_cluster(storage, site_summary, opinion)