def add_singleton_cluster(self, storage, site_summary, opinion): cluster = Cluster( pk=self.cluster_id.next(), site_summary=site_summary, primary_description=opinion.description, primary_comment=None, positive=opinion.positive, size=1, ) storage.save(cluster) comment = Comment( pk=self.comment_id.next(), description=opinion.description, opinion_id=opinion.id, cluster=cluster, score=1.0, ) storage.save(comment) cluster.primary_comment = comment storage.save(cluster)
def make_clusters(summary, type, numcomments): """Create a bunch of clusters for the given summary.""" numcreated = 0 for csize in [NUM_PRAISE - NUM_ISSUES, NUM_ISSUES]: if numcreated >= numcomments: break cluster = Cluster(site_summary=summary, size=csize) for i in xrange(csize): if i == 0: cluster.save() c = make_comment(cluster, csize, i, type) if i == 0: cluster.primary_description = c.description cluster.primary_comment = c cluster.save() numcreated += csize
def generate_clusters_for(self, err, storage, group): num_clusters = 0 site_summary = SiteSummary( pk=self.site_summary_id.next(), size=len(group.opinion_pks), issues_count=group.positive_counts[0], praise_count=group.positive_counts[1], **group.key ) storage.save(site_summary) group_positive = group.key["positive"] # Handle single-comment case: if site_summary.size == 1: opinion = Opinion.objects.get(pk=group.opinion_pks[0]) self.add_singleton_cluster(storage, site_summary, opinion) return opinions = Opinion.objects.filter(pk__in=group.opinion_pks) # Handle cluster case, make one corpus for positive, one for negative. for positive in (0, 1): if group_positive is not None and positive != group_positive: continue corpus = Corpus() remaining_opinions = {} for opinion in opinions: if opinion.positive != positive: continue remaining_opinions[opinion.id] = opinion corpus.add(opinion, str=unicode(opinion.description)) clusters = corpus.cluster() for next in clusters: primary = {"object": next.primary, "similarity": 1.0} comments = [ Comment( pk=self.comment_id.next(), description=doc["object"].description, opinion_id=doc["object"].id, score=doc["similarity"], ) for doc in [primary] + next.similars ] cluster = Cluster( pk=self.cluster_id.next(), site_summary=site_summary, primary_description=comments[0].description, primary_comment=None, positive=positive, size=len(comments), ) storage.save(cluster) for comment in comments: del remaining_opinions[comment.opinion_id] comment.cluster = cluster storage.save(comment) cluster.primary_comment = comments[0] cluster.save() # Add singletons for remaining opinions for opinion in remaining_opinions.values(): self.add_singleton_cluster(storage, site_summary, opinion)