def set_info(self): if self.data is not None: self.info_summary.setText('<b>%s</b> contains %s with %s' % ( self.data.name, plural('{number} instance{s}', self.model.n_instances), plural('{number} feature{s}', self.model.n_attributes) )) self.info_attr.setText( '<b>Attributes:</b><br>%s' % self._format_variables_string(self.data.domain.attributes) ) self.info_class.setText( '<b>Class variables:</b><br>%s' % self._format_variables_string(self.data.domain.class_vars) ) self.info_meta.setText( '<b>Metas:</b><br>%s' % self._format_variables_string(self.data.domain.metas) ) else: self.info_summary.setText('No data on input.') self.info_attr.setText('') self.info_class.setText('') self.info_meta.setText('')
def _format_variables_string(self, variables): agg = [] for var_type_name, var_type in [ ('categorical', DiscreteVariable), ('numeric', ContinuousVariable), ('time', TimeVariable), ('string', StringVariable) ]: var_type_list = [v for v in variables if type(v) is var_type] if var_type_list: shown = var_type in self.model.HIDDEN_VAR_TYPES agg.append(( '%d %s%s' % (len(var_type_list), var_type_name, ['', ' (not shown)'][shown]), len(var_type_list) )) if not agg: return 'No variables' attrs, counts = list(zip(*agg)) if len(attrs) > 1: var_string = ', '.join(attrs[:-1]) + ' and ' + attrs[-1] else: var_string = attrs[0] return plural('%s variable{s}' % var_string, sum(counts))
def send_report(self): items = OrderedDict() for feature in self.featuremodel: if isinstance(feature, DiscreteDescriptor): items[feature.name] = "{} (discrete with values {}{})".format( feature.expression, feature.values, "; ordered" * feature.ordered) elif isinstance(feature, ContinuousDescriptor): items[feature.name] = "{} (numeric)".format(feature.expression) else: items[feature.name] = "{} (text)".format(feature.expression) self.report_items( report.plural("Constructed feature{s}", len(items)), items)
def send_report(self): items = OrderedDict() for feature in self.featuremodel: if isinstance(feature, DiscreteDescriptor): items[feature.name] = "{} (categorical with values {}{})".format( feature.expression, feature.values, "; ordered" * feature.ordered) elif isinstance(feature, ContinuousDescriptor): items[feature.name] = "{} (numeric)".format(feature.expression) else: items[feature.name] = "{} (text)".format(feature.expression) self.report_items( report.plural("Constructed feature{s}", len(items)), items)
def _format_variables_string(variables): agg = [] for var_type_name, var_type in [ ('categorical', DiscreteVariable), ('numeric', ContinuousVariable), ('time', TimeVariable), ('string', StringVariable) ]: var_type_list = [v for v in variables if isinstance(v, var_type)] if var_type_list: agg.append(( '%d %s' % (len(var_type_list), var_type_name), len(var_type_list) )) if not agg: return 'No variables' attrs, counts = list(zip(*agg)) if len(attrs) > 1: var_string = ', '.join(attrs[:-1]) + ' and ' + attrs[-1] else: var_string = attrs[0] return plural('%s variable{s}' % var_string, sum(counts))
def data(self, data): def n_or_none(i): return i or "(none)" def count(s, tpe): return sum(isinstance(x, tpe) for x in s) def count_n(s, tpe): return n_or_none(count(s, tpe)) def pack_table(data): return "<table>\n" + "\n".join( '<tr><td align="right" width="90">%s:</td>\n' '<td width="40">%s</td></tr>\n' % dv for dv in data ) + "</table>\n" if data is None: self.data_set_size = "No data" self.features = self.targets = self.meta_attributes = "None" self.location = "" self.data_desc = None return sparses = [s for s, m in (("features", data.X_density), ("meta attributes", data.metas_density), ("targets", data.Y_density)) if m() > 1] if sparses: sparses = "<p>Sparse representation: %s</p>" % ", ".join(sparses) else: sparses = "" domain = data.domain self.data_set_size = pack_table(( ("Rows", '~{}'.format(data.approx_len())), ("Variables", len(domain)))) + sparses def update_size(): self.data_set_size = pack_table(( ("Rows", len(data)), ("Variables", len(domain)))) + sparses threading.Thread(target=update_size).start() if not domain.attributes: self.features = "None" else: disc_features = count(domain.attributes, DiscreteVariable) cont_features = count(domain.attributes, ContinuousVariable) self.features = pack_table(( ("Discrete", n_or_none(disc_features)), ("Numeric", n_or_none(cont_features)) )) if not domain.metas: self.meta_attributes = "None" else: disc_metas = count(domain.metas, DiscreteVariable) cont_metas = count(domain.metas, ContinuousVariable) str_metas = count(domain.metas, StringVariable) self.meta_attributes = pack_table(( ("Discrete", n_or_none(disc_metas)), ("Numeric", n_or_none(cont_metas)), ("Textual", n_or_none(str_metas)))) class_var = domain.class_var if class_var: if class_var.is_continuous: self.targets = "Numeric target variable" else: self.targets = "Discrete outcome with %i values" % \ len(class_var.values) elif domain.class_vars: disc_class = count(domain.class_vars, DiscreteVariable) cont_class = count(domain.class_vars, ContinuousVariable) if not cont_class: self.targets = "Multitarget data,\n%i discrete targets" % \ n_or_none(disc_class) elif not disc_class: self.targets = "Multitarget data,\n%i numeric targets" % \ n_or_none(cont_class) else: self.targets = "<p>Multi target data</p>\n" + pack_table( (("Discrete", disc_class), ("Numeric", cont_class))) self.data_desc = dd = OrderedDict() if SqlTable is not None and isinstance(data, SqlTable): connection_string = ' '.join( '%s=%s' % (key, value) for key, value in data.connection_params.items() if value is not None and key != 'password') self.location = "Table '%s', using connection:\n%s" % ( data.table_name, connection_string) dd["Rows"] = data.approx_len() else: self.location = "Data is stored in memory" dd["Rows"] = len(data) def join_if(items): return ", ".join(s.format(n) for s, n in items if n) dd["Features"] = len(domain.attributes) and join_if(( ("{} discrete", disc_features), ("{} numeric", cont_features) )) if domain.class_var: name = domain.class_var.name if domain.class_var.is_discrete: dd["Target"] = "discrete outcome '{}'".format(name) else: dd["Target"] = "numeric target '{}'".format(name) elif domain.class_vars: tt = "" if disc_class: tt += report.plural("{number} discrete outcome{s}", disc_class) if cont_class: tt += report.plural("{number} numeric target{s}", cont_class) dd["Meta attributes"] = len(domain.metas) > 0 and join_if(( ("{} discrete", disc_metas), ("{} numeric", cont_metas), ("{} textual", str_metas) ))
def data(self, data): def n_or_none(i): return i or "(none)" def count(s, tpe): return sum(isinstance(x, tpe) for x in s) def count_n(s, tpe): return n_or_none(count(s, tpe)) def pack_table(data): return "<table>\n" + "\n".join( '<tr><td align="right" width="90">%s:</td>\n' '<td width="40">%s</td></tr>\n' % dv for dv in data) + "</table>\n" if data is None: self.data_set_size = "No data" self.features = self.targets = self.meta_attributes = "None" self.location = "" self.data_desc = None return sparses = [ s for s, m in (("features", data.X_density), ("meta attributes", data.metas_density), ("targets", data.Y_density)) if m() > 1 ] if sparses: sparses = "<p>Sparse representation: %s</p>" % ", ".join(sparses) else: sparses = "" domain = data.domain self.data_set_size = pack_table( (("Rows", '~{}'.format(data.approx_len())), ("Variables", len(domain)))) + sparses def update_size(): self.data_set_size = pack_table( (("Rows", len(data)), ("Variables", len(domain)))) + sparses threading.Thread(target=update_size).start() if not domain.attributes: self.features = "None" else: disc_features = count(domain.attributes, DiscreteVariable) cont_features = count(domain.attributes, ContinuousVariable) self.features = pack_table((("Discrete", n_or_none(disc_features)), ("Numeric", n_or_none(cont_features)))) if not domain.metas: self.meta_attributes = "None" else: disc_metas = count(domain.metas, DiscreteVariable) cont_metas = count(domain.metas, ContinuousVariable) str_metas = count(domain.metas, StringVariable) self.meta_attributes = pack_table( (("Discrete", n_or_none(disc_metas)), ("Numeric", n_or_none(cont_metas)), ("Textual", n_or_none(str_metas)))) class_var = domain.class_var if class_var: if class_var.is_continuous: self.targets = "Numeric target variable" else: self.targets = "Discrete outcome with %i values" % \ len(class_var.values) elif domain.class_vars: disc_class = count(domain.class_vars, DiscreteVariable) cont_class = count(domain.class_vars, ContinuousVariable) if not cont_class: self.targets = "Multitarget data,\n%i discrete targets" % \ n_or_none(disc_class) elif not disc_class: self.targets = "Multitarget data,\n%i numeric targets" % \ n_or_none(cont_class) else: self.targets = "<p>Multi target data</p>\n" + pack_table( (("Discrete", disc_class), ("Numeric", cont_class))) self.data_desc = dd = OrderedDict() if SqlTable is not None and isinstance(data, SqlTable): connection_string = ' '.join( '%s=%s' % (key, value) for key, value in data.connection_params.items() if value is not None and key != 'password') self.location = "Table '%s', using connection:\n%s" % ( data.table_name, connection_string) dd["Rows"] = data.approx_len() else: self.location = "Data is stored in memory" dd["Rows"] = len(data) def join_if(items): return ", ".join(s.format(n) for s, n in items if n) dd["Features"] = len(domain.attributes) and join_if( (("{} discrete", disc_features), ("{} numeric", cont_features))) if domain.class_var: name = domain.class_var.name if domain.class_var.is_discrete: dd["Target"] = "discrete outcome '{}'".format(name) else: dd["Target"] = "numeric target '{}'".format(name) elif domain.class_vars: tt = "" if disc_class: tt += report.plural("{number} discrete outcome{s}", disc_class) if cont_class: tt += report.plural("{number} numeric target{s}", cont_class) dd["Meta attributes"] = len(domain.metas) > 0 and join_if( (("{} discrete", disc_metas), ("{} numeric", cont_metas), ("{} textual", str_metas)))