def set_info(self):
        if self.data is not None:
            self.info_summary.setText('<b>%s</b> contains %s with %s' % (
                self.data.name,
                plural('{number} instance{s}', self.model.n_instances),
                plural('{number} feature{s}', self.model.n_attributes)
            ))

            self.info_attr.setText(
                '<b>Attributes:</b><br>%s' %
                self._format_variables_string(self.data.domain.attributes)
            )
            self.info_class.setText(
                '<b>Class variables:</b><br>%s' %
                self._format_variables_string(self.data.domain.class_vars)
            )
            self.info_meta.setText(
                '<b>Metas:</b><br>%s' %
                self._format_variables_string(self.data.domain.metas)
            )
        else:
            self.info_summary.setText('No data on input.')
            self.info_attr.setText('')
            self.info_class.setText('')
            self.info_meta.setText('')
Пример #2
0
    def _format_variables_string(self, variables):
        agg = []
        for var_type_name, var_type in [
            ('categorical', DiscreteVariable),
            ('numeric', ContinuousVariable),
            ('time', TimeVariable),
            ('string', StringVariable)
        ]:
            var_type_list = [v for v in variables if type(v) is var_type]
            if var_type_list:
                shown = var_type in self.model.HIDDEN_VAR_TYPES
                agg.append((
                    '%d %s%s' % (len(var_type_list), var_type_name, ['', ' (not shown)'][shown]),
                    len(var_type_list)
                ))

        if not agg:
            return 'No variables'

        attrs, counts = list(zip(*agg))
        if len(attrs) > 1:
            var_string = ', '.join(attrs[:-1]) + ' and ' + attrs[-1]
        else:
            var_string = attrs[0]
        return plural('%s variable{s}' % var_string, sum(counts))
Пример #3
0
 def send_report(self):
     items = OrderedDict()
     for feature in self.featuremodel:
         if isinstance(feature, DiscreteDescriptor):
             items[feature.name] = "{} (discrete with values {}{})".format(
                 feature.expression, feature.values,
                 "; ordered" * feature.ordered)
         elif isinstance(feature, ContinuousDescriptor):
             items[feature.name] = "{} (numeric)".format(feature.expression)
         else:
             items[feature.name] = "{} (text)".format(feature.expression)
     self.report_items(
         report.plural("Constructed feature{s}", len(items)), items)
Пример #4
0
 def send_report(self):
     items = OrderedDict()
     for feature in self.featuremodel:
         if isinstance(feature, DiscreteDescriptor):
             items[feature.name] = "{} (categorical with values {}{})".format(
                 feature.expression, feature.values,
                 "; ordered" * feature.ordered)
         elif isinstance(feature, ContinuousDescriptor):
             items[feature.name] = "{} (numeric)".format(feature.expression)
         else:
             items[feature.name] = "{} (text)".format(feature.expression)
     self.report_items(
         report.plural("Constructed feature{s}", len(items)), items)
    def _format_variables_string(variables):
        agg = []
        for var_type_name, var_type in [
            ('categorical', DiscreteVariable),
            ('numeric', ContinuousVariable),
            ('time', TimeVariable),
            ('string', StringVariable)
        ]:
            var_type_list = [v for v in variables if isinstance(v, var_type)]
            if var_type_list:
                agg.append((
                    '%d %s' % (len(var_type_list), var_type_name),
                    len(var_type_list)
                ))

        if not agg:
            return 'No variables'

        attrs, counts = list(zip(*agg))
        if len(attrs) > 1:
            var_string = ', '.join(attrs[:-1]) + ' and ' + attrs[-1]
        else:
            var_string = attrs[0]
        return plural('%s variable{s}' % var_string, sum(counts))
Пример #6
0
    def data(self, data):
        def n_or_none(i):
            return i or "(none)"

        def count(s, tpe):
            return sum(isinstance(x, tpe) for x in s)

        def count_n(s, tpe):
            return n_or_none(count(s, tpe))

        def pack_table(data):
            return "<table>\n" + "\n".join(
                '<tr><td align="right" width="90">%s:</td>\n'
                '<td width="40">%s</td></tr>\n' % dv for dv in data
            ) + "</table>\n"

        if data is None:
            self.data_set_size = "No data"
            self.features = self.targets = self.meta_attributes = "None"
            self.location = ""
            self.data_desc = None
            return

        sparses = [s for s, m in (("features", data.X_density),
                                  ("meta attributes", data.metas_density),
                                  ("targets", data.Y_density)) if m() > 1]
        if sparses:
            sparses = "<p>Sparse representation: %s</p>" % ", ".join(sparses)
        else:
            sparses = ""
        domain = data.domain
        self.data_set_size = pack_table((
            ("Rows", '~{}'.format(data.approx_len())),
            ("Variables", len(domain)))) + sparses

        def update_size():
            self.data_set_size = pack_table((
                ("Rows", len(data)),
                ("Variables", len(domain)))) + sparses

        threading.Thread(target=update_size).start()

        if not domain.attributes:
            self.features = "None"
        else:
            disc_features = count(domain.attributes, DiscreteVariable)
            cont_features = count(domain.attributes, ContinuousVariable)
            self.features = pack_table((
                ("Discrete", n_or_none(disc_features)),
                ("Numeric", n_or_none(cont_features))
            ))

        if not domain.metas:
            self.meta_attributes = "None"
        else:
            disc_metas = count(domain.metas, DiscreteVariable)
            cont_metas = count(domain.metas, ContinuousVariable)
            str_metas = count(domain.metas, StringVariable)
            self.meta_attributes = pack_table((
                ("Discrete", n_or_none(disc_metas)),
                ("Numeric", n_or_none(cont_metas)),
                ("Textual", n_or_none(str_metas))))

        class_var = domain.class_var
        if class_var:
            if class_var.is_continuous:
                self.targets = "Numeric target variable"
            else:
                self.targets = "Discrete outcome with %i values" % \
                               len(class_var.values)
        elif domain.class_vars:
            disc_class = count(domain.class_vars, DiscreteVariable)
            cont_class = count(domain.class_vars, ContinuousVariable)
            if not cont_class:
                self.targets = "Multitarget data,\n%i discrete targets" % \
                               n_or_none(disc_class)
            elif not disc_class:
                self.targets = "Multitarget data,\n%i numeric targets" % \
                               n_or_none(cont_class)
            else:
                self.targets = "<p>Multi target data</p>\n" + pack_table(
                    (("Discrete", disc_class), ("Numeric", cont_class)))

        self.data_desc = dd = OrderedDict()

        if SqlTable is not None and isinstance(data, SqlTable):
            connection_string = ' '.join(
                '%s=%s' % (key, value)
                for key, value in data.connection_params.items()
                if value is not None and key != 'password')
            self.location = "Table '%s', using connection:\n%s" % (
                data.table_name, connection_string)
            dd["Rows"] = data.approx_len()
        else:
            self.location = "Data is stored in memory"
            dd["Rows"] = len(data)

        def join_if(items):
            return ", ".join(s.format(n) for s, n in items if n)

        dd["Features"] = len(domain.attributes) and join_if((
            ("{} discrete", disc_features),
            ("{} numeric", cont_features)
        ))
        if domain.class_var:
            name = domain.class_var.name
            if domain.class_var.is_discrete:
                dd["Target"] = "discrete outcome '{}'".format(name)
            else:
                dd["Target"] = "numeric target '{}'".format(name)
        elif domain.class_vars:
            tt = ""
            if disc_class:
                tt += report.plural("{number} discrete outcome{s}", disc_class)
            if cont_class:
                tt += report.plural("{number} numeric target{s}", cont_class)
        dd["Meta attributes"] = len(domain.metas) > 0 and join_if((
            ("{} discrete", disc_metas),
            ("{} numeric", cont_metas),
            ("{} textual", str_metas)
        ))
Пример #7
0
    def data(self, data):
        def n_or_none(i):
            return i or "(none)"

        def count(s, tpe):
            return sum(isinstance(x, tpe) for x in s)

        def count_n(s, tpe):
            return n_or_none(count(s, tpe))

        def pack_table(data):
            return "<table>\n" + "\n".join(
                '<tr><td align="right" width="90">%s:</td>\n'
                '<td width="40">%s</td></tr>\n' % dv
                for dv in data) + "</table>\n"

        if data is None:
            self.data_set_size = "No data"
            self.features = self.targets = self.meta_attributes = "None"
            self.location = ""
            self.data_desc = None
            return

        sparses = [
            s for s, m in (("features", data.X_density), ("meta attributes",
                                                          data.metas_density),
                           ("targets", data.Y_density)) if m() > 1
        ]
        if sparses:
            sparses = "<p>Sparse representation: %s</p>" % ", ".join(sparses)
        else:
            sparses = ""
        domain = data.domain
        self.data_set_size = pack_table(
            (("Rows", '~{}'.format(data.approx_len())),
             ("Variables", len(domain)))) + sparses

        def update_size():
            self.data_set_size = pack_table(
                (("Rows", len(data)), ("Variables", len(domain)))) + sparses

        threading.Thread(target=update_size).start()

        if not domain.attributes:
            self.features = "None"
        else:
            disc_features = count(domain.attributes, DiscreteVariable)
            cont_features = count(domain.attributes, ContinuousVariable)
            self.features = pack_table((("Discrete", n_or_none(disc_features)),
                                        ("Numeric", n_or_none(cont_features))))

        if not domain.metas:
            self.meta_attributes = "None"
        else:
            disc_metas = count(domain.metas, DiscreteVariable)
            cont_metas = count(domain.metas, ContinuousVariable)
            str_metas = count(domain.metas, StringVariable)
            self.meta_attributes = pack_table(
                (("Discrete", n_or_none(disc_metas)),
                 ("Numeric", n_or_none(cont_metas)), ("Textual",
                                                      n_or_none(str_metas))))

        class_var = domain.class_var
        if class_var:
            if class_var.is_continuous:
                self.targets = "Numeric target variable"
            else:
                self.targets = "Discrete outcome with %i values" % \
                               len(class_var.values)
        elif domain.class_vars:
            disc_class = count(domain.class_vars, DiscreteVariable)
            cont_class = count(domain.class_vars, ContinuousVariable)
            if not cont_class:
                self.targets = "Multitarget data,\n%i discrete targets" % \
                               n_or_none(disc_class)
            elif not disc_class:
                self.targets = "Multitarget data,\n%i numeric targets" % \
                               n_or_none(cont_class)
            else:
                self.targets = "<p>Multi target data</p>\n" + pack_table(
                    (("Discrete", disc_class), ("Numeric", cont_class)))

        self.data_desc = dd = OrderedDict()

        if SqlTable is not None and isinstance(data, SqlTable):
            connection_string = ' '.join(
                '%s=%s' % (key, value)
                for key, value in data.connection_params.items()
                if value is not None and key != 'password')
            self.location = "Table '%s', using connection:\n%s" % (
                data.table_name, connection_string)
            dd["Rows"] = data.approx_len()
        else:
            self.location = "Data is stored in memory"
            dd["Rows"] = len(data)

        def join_if(items):
            return ", ".join(s.format(n) for s, n in items if n)

        dd["Features"] = len(domain.attributes) and join_if(
            (("{} discrete", disc_features), ("{} numeric", cont_features)))
        if domain.class_var:
            name = domain.class_var.name
            if domain.class_var.is_discrete:
                dd["Target"] = "discrete outcome '{}'".format(name)
            else:
                dd["Target"] = "numeric target '{}'".format(name)
        elif domain.class_vars:
            tt = ""
            if disc_class:
                tt += report.plural("{number} discrete outcome{s}", disc_class)
            if cont_class:
                tt += report.plural("{number} numeric target{s}", cont_class)
        dd["Meta attributes"] = len(domain.metas) > 0 and join_if(
            (("{} discrete", disc_metas), ("{} numeric", cont_metas),
             ("{} textual", str_metas)))