def summarize(self, out=sys.stdout): """Prints a summary of the cluster info """ out.write(u"Cluster of %s centroids\n\n" % len(self.centroids)) out.write(u"Data distribution:\n") print_distribution(self.get_data_distribution(), out=out) out.write(u"\n\n") centroids_list = sorted(self.centroids, key=lambda x: x.name) out.write(u"Centroids features:\n") for centroid in centroids_list: out.write(utf8(u"\n%s: " % centroid.name)) connector = "" for field_id, value in centroid.center.items(): if isinstance(value, basestring): value = u"\"%s\"" % value out.write(utf8(u"%s%s: %s" % (connector, self.fields[field_id]['name'], value))) connector = ", " out.write(u"\n\n") out.write(u"Data distance statistics:\n\n") for centroid in centroids_list: centroid.print_statistics(out=out) if len(self.centroids) > 1: out.write(u"Intercentroids distance:\n\n") for centroid in centroids_list: out.write(utf8(u"To centroid: %s\n" % centroid.name)) for measure, result in self.centroids_distance(centroid): out.write(u"%s%s: %s\n" % (INDENT, measure, result)) out.write(u"\n")
def summarize(self, out=sys.stdout): """Prints a summary of the cluster info """ out.write(u"Cluster of %s centroids\n\n" % len(self.centroids)) out.write(u"Data distribution:\n") print_distribution(self.get_data_distribution(), out=out) out.write(u"\n\n") centroids_list = sorted(self.centroids, key=lambda x: x.name) out.write(u"Centroids features:\n") for centroid in centroids_list: out.write(utf8(u"\n%s: " % centroid.name)) connector = "" for field_id, value in centroid.center.items(): if isinstance(value, basestring): value = u"\"%s\"" % value out.write( utf8(u"%s%s: %s" % (connector, self.fields[field_id]['name'], value))) connector = ", " out.write(u"\n\n") out.write(u"Data distance statistics:\n\n") for centroid in centroids_list: centroid.print_statistics(out=out) out.write(u"Intercentroids distance:\n\n") for centroid in centroids_list: out.write(utf8(u"To centroid: %s\n" % centroid.name)) for measure, result in self.centroids_distance(centroid): out.write(u"%s%s: %s\n" % (INDENT, measure, result)) out.write(u"\n")
def summarize(self, out=sys.stdout): """Prints a summary of the cluster info """ report_header = '' if self.is_g_means: report_header = \ u'G-means Cluster (critical_value=%d)' % self.critical_value else: report_header = u'K-means Cluster (k=%d)' % self.k out.write(report_header + ' with %d centroids\n\n' % len(self.centroids)) out.write(u"Data distribution:\n") # "Global" is set as first entry self.print_global_distribution(out=out) print_distribution(self.get_data_distribution(), out=out) out.write(u"\n") centroids_list = [self.cluster_global] if self.cluster_global else [] centroids_list.extend(sorted(self.centroids, key=lambda x: x.name)) out.write(u"Cluster metrics:\n") self.print_ss_metrics(out=out) out.write(u"\n") out.write(u"Centroids:\n") for centroid in centroids_list: out.write(utf8(u"\n%s%s: " % (INDENT, centroid.name))) connector = "" for field_id, value in centroid.center.items(): if isinstance(value, basestring): value = u"\"%s\"" % value out.write(utf8(u"%s%s: %s" % (connector, self.fields[field_id]['name'], value))) connector = ", " out.write(u"\n\n") out.write(u"Distance distribution:\n\n") for centroid in centroids_list: centroid.print_statistics(out=out) out.write(u"\n") if len(self.centroids) > 1: out.write(u"Intercentroid distance:\n\n") centroids_list = (centroids_list[1:] if self.cluster_global else centroids_list) for centroid in centroids_list: out.write(utf8(u"%sTo centroid: %s\n" % (INDENT, centroid.name))) for measure, result in self.centroids_distance(centroid): out.write(u"%s%s: %s\n" % (INDENT * 2, measure, result)) out.write(u"\n")
def summarize(self, out=sys.stdout): """Prints ensemble summary. Only field importance at present. """ distribution = self.get_data_distribution("training") out.write(u"Data distribution:\n") print_distribution(distribution, out=out) out.write(u"\n\n") predictions = self.get_data_distribution("predictions") out.write(u"Predicted distribution:\n") print_distribution(predictions, out=out) out.write(u"\n\n") out.write(u"Field importance:\n") self.print_importance(out=out) out.flush()