Пример #1
0
    def set_info(self):
        if self.data is not None:
            self.info_summary.setText('<b>%s</b> contains %s with %s' % (
                self.data.name,
                plural('{number} instance{s}', self.model.n_instances),
                plural('{number} feature{s}', self.model.n_attributes)
            ))

            self.info_attr.setText(
                '<b>Attributes:</b><br>%s' %
                self._format_variables_string(self.data.domain.attributes)
            )
            self.info_class.setText(
                '<b>Class variables:</b><br>%s' %
                self._format_variables_string(self.data.domain.class_vars)
            )
            self.info_meta.setText(
                '<b>Metas:</b><br>%s' %
                self._format_variables_string(self.data.domain.metas)
            )
        else:
            self.info_summary.setText('No data on input.')
            self.info_attr.setText('')
            self.info_class.setText('')
            self.info_meta.setText('')
Пример #2
0
    def set_info(self):
        if self.data is not None:
            self.info_summary.setText('<b>%s</b> contains %s with %s' % (
                self.data.name,
                plural('{number} instance{s}', self.model.n_instances),
                plural('{number} feature{s}', self.model.n_attributes)
            ))

            self.info_attr.setText(
                '<b>Attributes:</b><br>%s' %
                self._format_variables_string(self.data.domain.attributes)
            )
            self.info_class.setText(
                '<b>Class variables:</b><br>%s' %
                self._format_variables_string(self.data.domain.class_vars)
            )
            self.info_meta.setText(
                '<b>Metas:</b><br>%s' %
                self._format_variables_string(self.data.domain.metas)
            )
        else:
            self.info_summary.setText('No data on input.')
            self.info_attr.setText('')
            self.info_class.setText('')
            self.info_meta.setText('')
Пример #3
0
    def _format_variables_string(self, variables):
        agg = []
        for var_type_name, var_type in [('categorical', DiscreteVariable),
                                        ('numeric', ContinuousVariable),
                                        ('time', TimeVariable),
                                        ('string', StringVariable)]:
            # Disable pylint here because a `TimeVariable` is also a
            # `ContinuousVariable`, and should be labelled as such. That is why
            # it is necessary to check the type this way instead of using
            # `isinstance`, which would fail in the above case
            var_type_list = [v for v in variables if type(v) is var_type]  # pylint: disable=unidiomatic-typecheck
            if var_type_list:
                shown = var_type in self.model.HIDDEN_VAR_TYPES
                agg.append(('%d %s%s' % (len(var_type_list), var_type_name,
                                         ['', ' (not shown)'][shown]),
                            len(var_type_list)))

        if not agg:
            return 'No variables'

        attrs, counts = list(zip(*agg))
        if len(attrs) > 1:
            var_string = ', '.join(attrs[:-1]) + ' and ' + attrs[-1]
        else:
            var_string = attrs[0]
        return plural('%s variable{s}' % var_string, sum(counts))
Пример #4
0
    def _format_variables_string(self, variables):
        agg = []
        for var_type_name, var_type in [
                ('categorical', DiscreteVariable),
                ('numeric', ContinuousVariable),
                ('time', TimeVariable),
                ('string', StringVariable)
        ]:
            # Disable pylint here because a `TimeVariable` is also a
            # `ContinuousVariable`, and should be labelled as such. That is why
            # it is necessary to check the type this way instead of using
            # `isinstance`, which would fail in the above case
            var_type_list = [v for v in variables if type(v) is var_type]  # pylint: disable=unidiomatic-typecheck
            if var_type_list:
                shown = var_type in self.model.HIDDEN_VAR_TYPES
                agg.append((
                    '%d %s%s' % (len(var_type_list), var_type_name, ['', ' (not shown)'][shown]),
                    len(var_type_list)
                ))

        if not agg:
            return 'No variables'

        attrs, counts = list(zip(*agg))
        if len(attrs) > 1:
            var_string = ', '.join(attrs[:-1]) + ' and ' + attrs[-1]
        else:
            var_string = attrs[0]
        return plural('%s variable{s}' % var_string, sum(counts))
Пример #5
0
    def _set_report(self, data):
        # Attributes are defined in a function called from __init__
        # pylint: disable=attribute-defined-outside-init
        domain = data.domain
        count = self._count

        self.data_desc = dd = OrderedDict()
        dd["Name"] = self.data_set_name

        if SqlTable is not None and isinstance(data, SqlTable):
            connection_string = ' '.join(
                '{}={}'.format(key, value)
                for key, value in data.connection_params.items()
                if value is not None and key != 'password')
            self.location = "Table '{}', using connection:\n{}"\
                            .format(data.table_name, connection_string)
            dd["Rows"] = data.approx_len()
        else:
            self.location = "Data is stored in memory"
            dd["Rows"] = len(data)

        def join_if(items):
            return ", ".join(s.format(n) for s, n in items if n)

        dd["Features"] = len(domain.attributes) > 0 and join_if((
            ("{} categorical", count(domain.attributes, DiscreteVariable)),
            ("{} numeric", count(domain.attributes, ContinuousVariable))
        ))
        if domain.class_var:
            name = domain.class_var.name
            if domain.class_var.is_discrete:
                dd["Target"] = "categorical outcome '{}'".format(name)
            else:
                dd["Target"] = "numeric target '{}'".format(name)
        elif domain.class_vars:
            disc_class = count(domain.class_vars, DiscreteVariable)
            cont_class = count(domain.class_vars, ContinuousVariable)
            tt = ""
            if disc_class:
                tt += report.plural("{number} categorical outcome{s}", disc_class)
            if cont_class:
                tt += report.plural("{number} numeric target{s}", cont_class)
        dd["Meta attributes"] = len(domain.metas) > 0 and join_if((
            ("{} categorical", count(domain.metas, DiscreteVariable)),
            ("{} numeric", count(domain.metas, ContinuousVariable)),
            ("{} text", count(domain.metas, StringVariable))
        ))
Пример #6
0
    def _set_report(self, data):
        # Attributes are defined in a function called from __init__
        # pylint: disable=attribute-defined-outside-init
        domain = data.domain
        count = self._count

        self.data_desc = dd = OrderedDict()
        dd["Name"] = self.data_set_name

        if SqlTable is not None and isinstance(data, SqlTable):
            connection_string = ' '.join(
                '{}={}'.format(key, value)
                for key, value in data.connection_params.items()
                if value is not None and key != 'password')
            self.location = "Table '{}', using connection:\n{}"\
                            .format(data.table_name, connection_string)
            dd["Rows"] = data.approx_len()
        else:
            self.location = "Data is stored in memory"
            dd["Rows"] = len(data)

        def join_if(items):
            return ", ".join(s.format(n) for s, n in items if n)

        dd["Features"] = len(domain.attributes) > 0 and join_if((
            ("{} categorical", count(domain.attributes, DiscreteVariable)),
            ("{} numeric", count(domain.attributes, ContinuousVariable))
        ))
        if domain.class_var:
            name = domain.class_var.name
            if domain.class_var.is_discrete:
                dd["Target"] = "categorical outcome '{}'".format(name)
            else:
                dd["Target"] = "numeric target '{}'".format(name)
        elif domain.class_vars:
            disc_class = count(domain.class_vars, DiscreteVariable)
            cont_class = count(domain.class_vars, ContinuousVariable)
            tt = ""
            if disc_class:
                tt += report.plural("{number} categorical outcome{s}", disc_class)
            if cont_class:
                tt += report.plural("{number} numeric target{s}", cont_class)
        dd["Meta attributes"] = len(domain.metas) > 0 and join_if((
            ("{} categorical", count(domain.metas, DiscreteVariable)),
            ("{} numeric", count(domain.metas, ContinuousVariable)),
            ("{} text", count(domain.metas, StringVariable))
        ))
Пример #7
0
    def send_report(self):
        pca = report.bool_str(self.apply_pca)
        if self.apply_pca:
            pca += report.plural(', {number} component{s}', self.pca_components)

        self.report_items((
            ('PCA preprocessing', pca),
            ('Metric', METRICS[self.metric_idx][0]),
            ('k neighbors', self.k_neighbors),
            ('Resolution', self.resolution),
        ))
Пример #8
0
    def send_report(self):
        pca = report.bool_str(self.apply_pca)
        if self.apply_pca:
            pca += report.plural(", {number} component{s}", self.pca_components)

        self.report_items((
            ("PCA preprocessing", pca),
            ("Metric", METRICS[self.metric_idx][0]),
            ("k neighbors", self.k_neighbors),
            ("Resolution", self.resolution),
        ))
Пример #9
0
    def send_report(self):
        pca = report.bool_str(self.apply_pca)
        if self.apply_pca:
            pca += report.plural(', {number} component{s}',
                                 self.pca_components)

        self.report_items((
            ('PCA preprocessing', pca),
            ('Metric', METRICS[self.metric_idx][0]),
            ('k neighbors', self.k_neighbors),
            ('Resolution', self.resolution),
        ))
Пример #10
0
    def send_report(self):
        pca = report.bool_str(self.apply_pca)
        if self.apply_pca:
            pca += report.plural(", {number} component{s}", self.pca_components)

        self.report_items((
            ("Normalize data", report.bool_str(self.normalize)),
            ("PCA preprocessing", pca),
            ("Metric", METRICS[self.metric_idx][0]),
            ("k neighbors", self.k_neighbors),
            ("Resolution", self.resolution),
        ))
Пример #11
0
    def set_info(self):
        if self.data is not None:
            self.info_summary.setText(
                '<b>%s</b>包含%s个具有%s特征的实例' %
                (self.data.name, plural('{number}', self.model.n_instances),
                 plural('{number}', self.model.n_attributes)))

            self.info_attr.setText(
                '<b>属性:</b><br>%s' %
                self._format_variables_string(self.data.domain.attributes))
            self.info_class.setText(
                '<b>分类变量:</b><br>%s' %
                self._format_variables_string(self.data.domain.class_vars))
            self.info_meta.setText(
                '<b>元变量:</b><br>%s' %
                self._format_variables_string(self.data.domain.metas))
        else:
            self.info_summary.setText('没有输入数据。')
            self.info_attr.setText('')
            self.info_class.setText('')
            self.info_meta.setText('')
Пример #12
0
 def send_report(self):
     items = OrderedDict()
     for feature in self.featuremodel:
         if isinstance(feature, DiscreteDescriptor):
             items[feature.name] = "{} (categorical with values {}{})".format(
                 feature.expression, feature.values,
                 "; ordered" * feature.ordered)
         elif isinstance(feature, ContinuousDescriptor):
             items[feature.name] = "{} (numeric)".format(feature.expression)
         else:
             items[feature.name] = "{} (text)".format(feature.expression)
     self.report_items(
         report.plural("Constructed feature{s}", len(items)), items)
Пример #13
0
 def send_report(self):
     items = OrderedDict()
     for feature in self.featuremodel:
         if isinstance(feature, DiscreteDescriptor):
             items[feature.name] = "{} (categorical with values {}{})".format(
                 feature.expression, feature.values,
                 "; ordered" * feature.ordered)
         elif isinstance(feature, ContinuousDescriptor):
             items[feature.name] = "{} (numeric)".format(feature.expression)
         else:
             items[feature.name] = "{} (text)".format(feature.expression)
     self.report_items(
         report.plural("Constructed feature{s}", len(items)), items)
Пример #14
0
 def send_report(self):
     items = OrderedDict()
     for feature in self.featuremodel:
         if isinstance(feature, DiscreteDescriptor):
             desc = "categorical"
             if feature.values:
                 desc += " with values " \
                         + ", ".join(f"'{val}'" for val in feature.values)
             if feature.ordered:
                 desc += "; ordered"
         elif isinstance(feature, ContinuousDescriptor):
             desc = "numeric"
         elif isinstance(feature, DateTimeDescriptor):
             desc = "date/time"
         else:
             desc = "text"
         items[feature.name] = f"{feature.expression} ({desc})"
     self.report_items(report.plural("Constructed feature{s}", len(items)),
                       items)
Пример #15
0
    def data(self, data):
        def n_or_none(i):
            return i or "(none)"

        def count(s, tpe):
            return sum(isinstance(x, tpe) for x in s)

        def pack_table(info):
            return '<table>\n' + "\n".join(
                '<tr><td align="right" width="90">%s:</td>\n'
                '<td width="40">%s</td></tr>\n' %
                (d, textwrap.shorten(str(v), width=30, placeholder="..."))
                for d, v in info) + "</table>\n"

        if data is None:
            self.data_set_size = "No data"
            self.features = self.targets = self.meta_attributes = "None"
            self.location = ""
            self.data_desc = None
            self.data_attributes = ""
            return

        sparseness = [
            s for s, m in (("features", data.X_density), ("meta attributes",
                                                          data.metas_density),
                           ("targets", data.Y_density)) if m() > 1
        ]
        if sparseness:
            sparseness = "<p>Sparse representation: %s</p>" % ", ".join(
                sparseness)
        else:
            sparseness = ""
        domain = data.domain
        self.data_set_size = pack_table(
            (("Rows", '~{}'.format(data.approx_len())),
             ("Columns", len(domain) + len(domain.metas)))) + sparseness

        def update_size():
            self.data_set_size = pack_table(
                (("Rows", len(data)),
                 ("Columns", len(domain) + len(domain.metas)))) + sparseness

        threading.Thread(target=update_size).start()

        if not domain.attributes:
            self.features = "None"
        else:
            disc_features = count(domain.attributes, DiscreteVariable)
            cont_features = count(domain.attributes, ContinuousVariable)
            self.features = pack_table((("Discrete", n_or_none(disc_features)),
                                        ("Numeric", n_or_none(cont_features))))

        if not domain.metas:
            self.meta_attributes = "None"
        else:
            disc_metas = count(domain.metas, DiscreteVariable)
            cont_metas = count(domain.metas, ContinuousVariable)
            str_metas = count(domain.metas, StringVariable)
            self.meta_attributes = pack_table(
                (("Discrete", n_or_none(disc_metas)),
                 ("Numeric", n_or_none(cont_metas)), ("Textual",
                                                      n_or_none(str_metas))))

        class_var = domain.class_var
        if class_var:
            if class_var.is_continuous:
                self.targets = "Numeric target variable"
            else:
                self.targets = "Discrete outcome with %i values" % \
                               len(class_var.values)
        elif domain.class_vars:
            disc_class = count(domain.class_vars, DiscreteVariable)
            cont_class = count(domain.class_vars, ContinuousVariable)
            if not cont_class:
                self.targets = "Multi-target data,\n%i categorical targets" % \
                               n_or_none(disc_class)
            elif not disc_class:
                self.targets = "Multi-target data,\n%i numeric targets" % \
                               n_or_none(cont_class)
            else:
                self.targets = "<p>Multi-target data</p>\n" + pack_table(
                    (("Categorical", disc_class), ("Numeric", cont_class)))

        self.data_desc = dd = OrderedDict()

        if SqlTable is not None and isinstance(data, SqlTable):
            connection_string = ' '.join(
                '%s=%s' % (key, value)
                for key, value in data.connection_params.items()
                if value is not None and key != 'password')
            self.location = "Table '%s', using connection:\n%s" % (
                data.table_name, connection_string)
            dd["Rows"] = data.approx_len()
        else:
            self.location = "Data is stored in memory"
            dd["Rows"] = len(data)

        def join_if(items):
            return ", ".join(s.format(n) for s, n in items if n)

        dd["Features"] = len(domain.attributes) and join_if(
            (("{} categorical", disc_features), ("{} numeric", cont_features)))
        if domain.class_var:
            name = domain.class_var.name
            if domain.class_var.is_discrete:
                dd["Target"] = "categorical outcome '{}'".format(name)
            else:
                dd["Target"] = "numeric target '{}'".format(name)
        elif domain.class_vars:
            tt = ""
            if disc_class:
                tt += report.plural("{number} categorical outcome{s}",
                                    disc_class)
            if cont_class:
                tt += report.plural("{number} numeric target{s}", cont_class)
        dd["Meta attributes"] = len(domain.metas) > 0 and join_if(
            (("{} categorical", disc_metas), ("{} numeric", cont_metas),
             ("{} textual", str_metas)))

        if data.attributes:
            self.data_attributes = pack_table(data.attributes.items())
        else:
            self.data_attributes = ""