def set_info(self): if self.data is not None: self.info_summary.setText('<b>%s</b> contains %s with %s' % ( self.data.name, plural('{number} instance{s}', self.model.n_instances), plural('{number} feature{s}', self.model.n_attributes) )) self.info_attr.setText( '<b>Attributes:</b><br>%s' % self._format_variables_string(self.data.domain.attributes) ) self.info_class.setText( '<b>Class variables:</b><br>%s' % self._format_variables_string(self.data.domain.class_vars) ) self.info_meta.setText( '<b>Metas:</b><br>%s' % self._format_variables_string(self.data.domain.metas) ) else: self.info_summary.setText('No data on input.') self.info_attr.setText('') self.info_class.setText('') self.info_meta.setText('')
def _format_variables_string(self, variables): agg = [] for var_type_name, var_type in [('categorical', DiscreteVariable), ('numeric', ContinuousVariable), ('time', TimeVariable), ('string', StringVariable)]: # Disable pylint here because a `TimeVariable` is also a # `ContinuousVariable`, and should be labelled as such. That is why # it is necessary to check the type this way instead of using # `isinstance`, which would fail in the above case var_type_list = [v for v in variables if type(v) is var_type] # pylint: disable=unidiomatic-typecheck if var_type_list: shown = var_type in self.model.HIDDEN_VAR_TYPES agg.append(('%d %s%s' % (len(var_type_list), var_type_name, ['', ' (not shown)'][shown]), len(var_type_list))) if not agg: return 'No variables' attrs, counts = list(zip(*agg)) if len(attrs) > 1: var_string = ', '.join(attrs[:-1]) + ' and ' + attrs[-1] else: var_string = attrs[0] return plural('%s variable{s}' % var_string, sum(counts))
def _format_variables_string(self, variables): agg = [] for var_type_name, var_type in [ ('categorical', DiscreteVariable), ('numeric', ContinuousVariable), ('time', TimeVariable), ('string', StringVariable) ]: # Disable pylint here because a `TimeVariable` is also a # `ContinuousVariable`, and should be labelled as such. That is why # it is necessary to check the type this way instead of using # `isinstance`, which would fail in the above case var_type_list = [v for v in variables if type(v) is var_type] # pylint: disable=unidiomatic-typecheck if var_type_list: shown = var_type in self.model.HIDDEN_VAR_TYPES agg.append(( '%d %s%s' % (len(var_type_list), var_type_name, ['', ' (not shown)'][shown]), len(var_type_list) )) if not agg: return 'No variables' attrs, counts = list(zip(*agg)) if len(attrs) > 1: var_string = ', '.join(attrs[:-1]) + ' and ' + attrs[-1] else: var_string = attrs[0] return plural('%s variable{s}' % var_string, sum(counts))
def _set_report(self, data): # Attributes are defined in a function called from __init__ # pylint: disable=attribute-defined-outside-init domain = data.domain count = self._count self.data_desc = dd = OrderedDict() dd["Name"] = self.data_set_name if SqlTable is not None and isinstance(data, SqlTable): connection_string = ' '.join( '{}={}'.format(key, value) for key, value in data.connection_params.items() if value is not None and key != 'password') self.location = "Table '{}', using connection:\n{}"\ .format(data.table_name, connection_string) dd["Rows"] = data.approx_len() else: self.location = "Data is stored in memory" dd["Rows"] = len(data) def join_if(items): return ", ".join(s.format(n) for s, n in items if n) dd["Features"] = len(domain.attributes) > 0 and join_if(( ("{} categorical", count(domain.attributes, DiscreteVariable)), ("{} numeric", count(domain.attributes, ContinuousVariable)) )) if domain.class_var: name = domain.class_var.name if domain.class_var.is_discrete: dd["Target"] = "categorical outcome '{}'".format(name) else: dd["Target"] = "numeric target '{}'".format(name) elif domain.class_vars: disc_class = count(domain.class_vars, DiscreteVariable) cont_class = count(domain.class_vars, ContinuousVariable) tt = "" if disc_class: tt += report.plural("{number} categorical outcome{s}", disc_class) if cont_class: tt += report.plural("{number} numeric target{s}", cont_class) dd["Meta attributes"] = len(domain.metas) > 0 and join_if(( ("{} categorical", count(domain.metas, DiscreteVariable)), ("{} numeric", count(domain.metas, ContinuousVariable)), ("{} text", count(domain.metas, StringVariable)) ))
def send_report(self): pca = report.bool_str(self.apply_pca) if self.apply_pca: pca += report.plural(', {number} component{s}', self.pca_components) self.report_items(( ('PCA preprocessing', pca), ('Metric', METRICS[self.metric_idx][0]), ('k neighbors', self.k_neighbors), ('Resolution', self.resolution), ))
def send_report(self): pca = report.bool_str(self.apply_pca) if self.apply_pca: pca += report.plural(", {number} component{s}", self.pca_components) self.report_items(( ("PCA preprocessing", pca), ("Metric", METRICS[self.metric_idx][0]), ("k neighbors", self.k_neighbors), ("Resolution", self.resolution), ))
def send_report(self): pca = report.bool_str(self.apply_pca) if self.apply_pca: pca += report.plural(", {number} component{s}", self.pca_components) self.report_items(( ("Normalize data", report.bool_str(self.normalize)), ("PCA preprocessing", pca), ("Metric", METRICS[self.metric_idx][0]), ("k neighbors", self.k_neighbors), ("Resolution", self.resolution), ))
def set_info(self): if self.data is not None: self.info_summary.setText( '<b>%s</b>包含%s个具有%s特征的实例' % (self.data.name, plural('{number}', self.model.n_instances), plural('{number}', self.model.n_attributes))) self.info_attr.setText( '<b>属性:</b><br>%s' % self._format_variables_string(self.data.domain.attributes)) self.info_class.setText( '<b>分类变量:</b><br>%s' % self._format_variables_string(self.data.domain.class_vars)) self.info_meta.setText( '<b>元变量:</b><br>%s' % self._format_variables_string(self.data.domain.metas)) else: self.info_summary.setText('没有输入数据。') self.info_attr.setText('') self.info_class.setText('') self.info_meta.setText('')
def send_report(self): items = OrderedDict() for feature in self.featuremodel: if isinstance(feature, DiscreteDescriptor): items[feature.name] = "{} (categorical with values {}{})".format( feature.expression, feature.values, "; ordered" * feature.ordered) elif isinstance(feature, ContinuousDescriptor): items[feature.name] = "{} (numeric)".format(feature.expression) else: items[feature.name] = "{} (text)".format(feature.expression) self.report_items( report.plural("Constructed feature{s}", len(items)), items)
def send_report(self): items = OrderedDict() for feature in self.featuremodel: if isinstance(feature, DiscreteDescriptor): desc = "categorical" if feature.values: desc += " with values " \ + ", ".join(f"'{val}'" for val in feature.values) if feature.ordered: desc += "; ordered" elif isinstance(feature, ContinuousDescriptor): desc = "numeric" elif isinstance(feature, DateTimeDescriptor): desc = "date/time" else: desc = "text" items[feature.name] = f"{feature.expression} ({desc})" self.report_items(report.plural("Constructed feature{s}", len(items)), items)
def data(self, data): def n_or_none(i): return i or "(none)" def count(s, tpe): return sum(isinstance(x, tpe) for x in s) def pack_table(info): return '<table>\n' + "\n".join( '<tr><td align="right" width="90">%s:</td>\n' '<td width="40">%s</td></tr>\n' % (d, textwrap.shorten(str(v), width=30, placeholder="...")) for d, v in info) + "</table>\n" if data is None: self.data_set_size = "No data" self.features = self.targets = self.meta_attributes = "None" self.location = "" self.data_desc = None self.data_attributes = "" return sparseness = [ s for s, m in (("features", data.X_density), ("meta attributes", data.metas_density), ("targets", data.Y_density)) if m() > 1 ] if sparseness: sparseness = "<p>Sparse representation: %s</p>" % ", ".join( sparseness) else: sparseness = "" domain = data.domain self.data_set_size = pack_table( (("Rows", '~{}'.format(data.approx_len())), ("Columns", len(domain) + len(domain.metas)))) + sparseness def update_size(): self.data_set_size = pack_table( (("Rows", len(data)), ("Columns", len(domain) + len(domain.metas)))) + sparseness threading.Thread(target=update_size).start() if not domain.attributes: self.features = "None" else: disc_features = count(domain.attributes, DiscreteVariable) cont_features = count(domain.attributes, ContinuousVariable) self.features = pack_table((("Discrete", n_or_none(disc_features)), ("Numeric", n_or_none(cont_features)))) if not domain.metas: self.meta_attributes = "None" else: disc_metas = count(domain.metas, DiscreteVariable) cont_metas = count(domain.metas, ContinuousVariable) str_metas = count(domain.metas, StringVariable) self.meta_attributes = pack_table( (("Discrete", n_or_none(disc_metas)), ("Numeric", n_or_none(cont_metas)), ("Textual", n_or_none(str_metas)))) class_var = domain.class_var if class_var: if class_var.is_continuous: self.targets = "Numeric target variable" else: self.targets = "Discrete outcome with %i values" % \ len(class_var.values) elif domain.class_vars: disc_class = count(domain.class_vars, DiscreteVariable) cont_class = count(domain.class_vars, ContinuousVariable) if not cont_class: self.targets = "Multi-target data,\n%i categorical targets" % \ n_or_none(disc_class) elif not disc_class: self.targets = "Multi-target data,\n%i numeric targets" % \ n_or_none(cont_class) else: self.targets = "<p>Multi-target data</p>\n" + pack_table( (("Categorical", disc_class), ("Numeric", cont_class))) self.data_desc = dd = OrderedDict() if SqlTable is not None and isinstance(data, SqlTable): connection_string = ' '.join( '%s=%s' % (key, value) for key, value in data.connection_params.items() if value is not None and key != 'password') self.location = "Table '%s', using connection:\n%s" % ( data.table_name, connection_string) dd["Rows"] = data.approx_len() else: self.location = "Data is stored in memory" dd["Rows"] = len(data) def join_if(items): return ", ".join(s.format(n) for s, n in items if n) dd["Features"] = len(domain.attributes) and join_if( (("{} categorical", disc_features), ("{} numeric", cont_features))) if domain.class_var: name = domain.class_var.name if domain.class_var.is_discrete: dd["Target"] = "categorical outcome '{}'".format(name) else: dd["Target"] = "numeric target '{}'".format(name) elif domain.class_vars: tt = "" if disc_class: tt += report.plural("{number} categorical outcome{s}", disc_class) if cont_class: tt += report.plural("{number} numeric target{s}", cont_class) dd["Meta attributes"] = len(domain.metas) > 0 and join_if( (("{} categorical", disc_metas), ("{} numeric", cont_metas), ("{} textual", str_metas))) if data.attributes: self.data_attributes = pack_table(data.attributes.items()) else: self.data_attributes = ""