示例#1
0
文件: widget.py 项目: zyblx/orange3
 def set_output_summary(self, data):
     summary = len(data) if data else self.info.NoOutput
     detail = format_summary_details(data) if data else ""
     self.info.set_output_summary(summary, detail)
示例#2
0
    def commit(self):
        matching_output = self.data
        non_matching_output = None
        annotated_output = None

        self.Error.clear()
        if self.data:
            domain = self.data.domain
            conditions = []
            for attr_name, oper_idx, values in self.conditions:
                if attr_name in self.AllTypes:
                    attr_index = attr = None
                    attr_type = self.AllTypes[attr_name]
                    operators = self.Operators[attr_name]
                else:
                    attr_index = domain.index(attr_name)
                    attr = domain[attr_index]
                    attr_type = vartype(attr)
                    operators = self.Operators[type(attr)]
                opertype, _ = operators[oper_idx]
                if attr_type == 0:
                    filter = data_filter.IsDefined()
                elif attr_type in (2, 4):  # continuous, time
                    try:
                        floats = self._values_to_floats(attr, values)
                    except ValueError as e:
                        self.Error.parsing_error(e.args[0])
                        return
                    if floats is None:
                        continue
                    filter = data_filter.FilterContinuous(
                        attr_index, opertype, *floats)
                elif attr_type == 3:  # string
                    filter = data_filter.FilterString(
                        attr_index, opertype, *[str(v) for v in values])
                else:
                    if opertype == FilterDiscreteType.IsDefined:
                        f_values = None
                    else:
                        if not values or not values[0]:
                            continue
                        values = [attr.values[i - 1] for i in values]
                        if opertype == FilterDiscreteType.Equal:
                            f_values = {values[0]}
                        elif opertype == FilterDiscreteType.NotEqual:
                            f_values = set(attr.values)
                            f_values.remove(values[0])
                        elif opertype == FilterDiscreteType.In:
                            f_values = set(values)
                        else:
                            raise ValueError("invalid operand")
                    filter = data_filter.FilterDiscrete(attr_index, f_values)
                conditions.append(filter)

            if conditions:
                self.filters = data_filter.Values(conditions)
                matching_output = self.filters(self.data)
                self.filters.negate = True
                non_matching_output = self.filters(self.data)

                row_sel = np.in1d(self.data.ids, matching_output.ids)
                annotated_output = create_annotated_table(self.data, row_sel)

            # if hasattr(self.data, "name"):
            #     matching_output.name = self.data.name
            #     non_matching_output.name = self.data.name

            purge_attrs = self.purge_attributes
            purge_classes = self.purge_classes
            if (purge_attrs or purge_classes) and \
                    not isinstance(self.data, SqlTable):
                attr_flags = sum([
                    Remove.RemoveConstant * purge_attrs,
                    Remove.RemoveUnusedValues * purge_attrs
                ])
                class_flags = sum([
                    Remove.RemoveConstant * purge_classes,
                    Remove.RemoveUnusedValues * purge_classes
                ])
                # same settings used for attributes and meta features
                remover = Remove(attr_flags, class_flags, attr_flags)

                matching_output = remover(matching_output)
                non_matching_output = remover(non_matching_output)
                annotated_output = remover(annotated_output)

        if matching_output is not None and not len(matching_output):
            matching_output = None
        if non_matching_output is not None and not len(non_matching_output):
            non_matching_output = None
        if annotated_output is not None and not len(annotated_output):
            annotated_output = None

        self.Outputs.matching_data.send(matching_output)
        self.Outputs.unmatched_data.send(non_matching_output)
        self.Outputs.annotated_data.send(annotated_output)

        self.match_desc = report.describe_data_brief(matching_output)
        self.nonmatch_desc = report.describe_data_brief(non_matching_output)

        summary = matching_output.approx_len() if matching_output else \
            self.info.NoOutput
        details = format_summary_details(
            matching_output) if matching_output else ""
        self.info.set_output_summary(summary, details)
示例#3
0
 def _set_output_summary(self, data: Optional[Table] = None):
     if data:
         summary, details = len(data), format_summary_details(data)
     else:
         summary, details = self.info.NoOutput, ""
     self.info.set_output_summary(summary, details)
示例#4
0
 def _set_output_summary(self, output):
     summary = len(output) if output else self.info.NoOutput
     details = format_summary_details(output) if output else ""
     self.info.set_output_summary(summary, details)
示例#5
0
 def _set_input_summary(self):
     summary = len(self.data) if self.data else self.info.NoInput
     details = format_summary_details(self.data) if self.data else ""
     self.info.set_input_summary(summary, details)
示例#6
0
    def test_output(self):
        # send data and template data
        info = self.widget.info
        no_input, no_output = "No data on input", "No data on output"
        self.send_signal(self.widget.Inputs.data, self.data[::15])
        self.send_signal(self.widget.Inputs.template_data, self.disc_data)
        output = self.get_output(self.widget.Outputs.transformed_data)
        self.assertTableEqual(output, self.disc_data[::15])
        self.assertEqual("Input data with 10 instances and 4 features.",
                         self.widget.input_label.text())
        self.assertEqual("Template domain applied.",
                         self.widget.template_label.text())
        self.assertEqual("Output data includes 4 features.",
                         self.widget.output_label.text())
        data_list = [("Data", self.data[::15]),
                     ("Template data", self.disc_data)]
        summary, details = "10, 150", format_multiple_summaries(data_list)
        self.assertEqual(info._StateInfo__input_summary.brief, summary)
        self.assertEqual(info._StateInfo__input_summary.details, details)
        summary, details = "10", format_summary_details(output)
        self.assertEqual(info._StateInfo__output_summary.brief, summary)
        self.assertEqual(info._StateInfo__output_summary.details, details)

        # remove template data
        self.send_signal(self.widget.Inputs.template_data, None)
        output = self.get_output(self.widget.Outputs.transformed_data)
        self.assertIsNone(output)
        self.assertEqual("Input data with 10 instances and 4 features.",
                         self.widget.input_label.text())
        self.assertEqual("No template data on input.",
                         self.widget.template_label.text())
        self.assertEqual("", self.widget.output_label.text())
        data_list = [("Data", self.data[::15]), ("Template data", None)]
        summary, details = "10, 0", format_multiple_summaries(data_list)
        self.assertEqual(info._StateInfo__input_summary.brief, summary)
        self.assertEqual(info._StateInfo__input_summary.details, details)
        self.assertEqual(info._StateInfo__output_summary.brief, "-")
        self.assertEqual(info._StateInfo__output_summary.details, no_output)

        # send template data
        self.send_signal(self.widget.Inputs.template_data, self.disc_data)
        output = self.get_output(self.widget.Outputs.transformed_data)
        self.assertTableEqual(output, self.disc_data[::15])
        self.assertEqual("Input data with 10 instances and 4 features.",
                         self.widget.input_label.text())
        self.assertEqual("Template domain applied.",
                         self.widget.template_label.text())
        self.assertEqual("Output data includes 4 features.",
                         self.widget.output_label.text())

        # remove data
        self.send_signal(self.widget.Inputs.data, None)
        output = self.get_output(self.widget.Outputs.transformed_data)
        self.assertIsNone(output)
        self.assertEqual("No data on input.", self.widget.input_label.text())
        self.assertEqual("Template data includes 4 features.",
                         self.widget.template_label.text())
        self.assertEqual("", self.widget.output_label.text())
        data_list = [("Data", None), ("Template data", self.disc_data)]
        summary, details = "0, 150", format_multiple_summaries(data_list)
        self.assertEqual(info._StateInfo__input_summary.brief, summary)
        self.assertEqual(info._StateInfo__input_summary.details, details)
        self.assertEqual(info._StateInfo__output_summary.brief, "-")
        self.assertEqual(info._StateInfo__output_summary.details, no_output)

        # remove template data
        self.send_signal(self.widget.Inputs.template_data, None)
        self.assertEqual("No data on input.", self.widget.input_label.text())
        self.assertEqual("No template data on input.",
                         self.widget.template_label.text())
        self.assertEqual("", self.widget.output_label.text())
        self.assertEqual(info._StateInfo__input_summary.brief, "-")
        self.assertEqual(info._StateInfo__input_summary.details, no_input)
        self.assertEqual(info._StateInfo__output_summary.brief, "-")
        self.assertEqual(info._StateInfo__output_summary.details, no_output)
示例#7
0
    def test_summary(self):
        """Check if status bar is updated when data is received"""
        info = self.widget.info
        no_input, no_output = "No data on input", "No data on output"

        zoo = Table("zoo")
        data_list = [("zoo", zoo)]
        self.send_signal(self.widget.Inputs.data, zoo, 1)
        summary, details = "101", format_multiple_summaries(data_list)
        self.assertEqual(info._StateInfo__input_summary.brief, summary)
        self.assertEqual(info._StateInfo__input_summary.details, details)
        self.assertEqual(info._StateInfo__output_summary.brief, "")
        self.assertEqual(info._StateInfo__output_summary.details, no_output)
        self._select_data()
        output = self.get_output(self.widget.Outputs.selected_data)
        summary, details = f"{len(output)}", format_summary_details(output)
        self.assertEqual(info._StateInfo__output_summary.brief, summary)
        self.assertEqual(info._StateInfo__output_summary.details, details)

        iris = Table("iris")
        data_list = [("zoo", zoo), ("iris", iris)]
        self.send_signal(self.widget.Inputs.data, iris, 2)
        summary, details = "101, 150", format_multiple_summaries(data_list)
        self.assertEqual(info._StateInfo__input_summary.brief, summary)
        self.assertEqual(info._StateInfo__input_summary.details, details)
        self.assertEqual(info._StateInfo__output_summary.brief, "")
        self.assertEqual(info._StateInfo__output_summary.details, no_output)
        self._select_data()
        output = self.get_output(self.widget.Outputs.selected_data)
        summary, details = f"{len(output)}", format_summary_details(output)
        self.assertEqual(info._StateInfo__output_summary.brief, summary)
        self.assertEqual(info._StateInfo__output_summary.details, details)

        brown = Table("brown-selected")
        data_list = [("zoo", zoo), ("iris", iris), ("brown-selected", brown)]
        self.send_signal(self.widget.Inputs.data, brown, 3)
        summary, details = "101, 150, 186", format_multiple_summaries(data_list)
        self.assertEqual(info._StateInfo__input_summary.brief, summary)
        self.assertEqual(info._StateInfo__input_summary.details, details)
        self._select_data()
        output = self.get_output(self.widget.Outputs.selected_data)
        summary, details = f"{len(output)}", format_summary_details(output)
        self.assertEqual(info._StateInfo__output_summary.brief, summary)
        self.assertEqual(info._StateInfo__output_summary.details, details)

        self.send_signal(self.widget.Inputs.data, None, 1)
        data_list = [("iris", iris), ("brown-selected", brown)]
        summary, details = "150, 186", format_multiple_summaries(data_list)
        self.assertEqual(info._StateInfo__input_summary.brief, summary)
        self.assertEqual(info._StateInfo__input_summary.details, details)
        self.assertEqual(info._StateInfo__output_summary.brief, "")
        self.assertEqual(info._StateInfo__output_summary.details, no_output)
        self._select_data()
        output = self.get_output(self.widget.Outputs.selected_data)
        summary, details = f"{len(output)}", format_summary_details(output)
        self.assertEqual(info._StateInfo__output_summary.brief, summary)
        self.assertEqual(info._StateInfo__output_summary.details, details)

        self.send_signal(self.widget.Inputs.data, None, 2)
        self.send_signal(self.widget.Inputs.data, None, 3)
        self.assertEqual(info._StateInfo__input_summary.brief, "")
        self.assertEqual(info._StateInfo__input_summary.details, no_input)
        self.assertEqual(info._StateInfo__output_summary.brief, "")
        self.assertEqual(info._StateInfo__output_summary.details, no_output)
示例#8
0
    def __commit_finish(self):
        assert QThread.currentThread() is self.thread()
        assert self.__task is not None
        futures = self.__task.futures
        assert len(futures) == len(self.varmodel)
        assert self.data is not None

        def get_variable(variable, future, drop_mask) \
                -> Optional[List[Orange.data.Variable]]:
            # Returns a (potentially empty) list of variables,
            # or None on failure that should interrupt the imputation
            assert future.done()
            try:
                res = future.result()
            except SparseNotSupported:
                self.Error.model_based_imputer_sparse()
                return []  # None?
            except VariableNotSupported:
                self.Warning.cant_handle_var(variable.name)
                return []
            except Exception:  # pylint: disable=broad-except
                log = logging.getLogger(__name__)
                log.info("Error for %s", variable.name, exc_info=True)
                self.Error.imputation_failed(variable.name)
                return None
            if isinstance(res, RowMask):
                drop_mask |= res.mask
                newvar = variable
            else:
                newvar = res
            if isinstance(newvar, Orange.data.Variable):
                newvar = [newvar]
            return newvar

        def create_data(attributes, class_vars):
            domain = Orange.data.Domain(attributes, class_vars,
                                        self.data.domain.metas)
            try:
                return self.data.from_table(domain, self.data[~drop_mask])
            except Exception:  # pylint: disable=broad-except
                log = logging.getLogger(__name__)
                log.info("Error", exc_info=True)
                self.Error.imputation_failed("Unknown")
                return None

        self.__task = None
        self.setInvalidated(False)
        self.progressBarFinished()

        attributes = []
        class_vars = []
        drop_mask = np.zeros(len(self.data), bool)
        for i, (var, fut) in enumerate(zip(self.varmodel, futures)):
            newvar = get_variable(var, fut, drop_mask)
            if newvar is None:
                data = None
                break
            if i < len(self.data.domain.attributes):
                attributes.extend(newvar)
            else:
                class_vars.extend(newvar)
        else:
            data = create_data(attributes, class_vars)

        self.Outputs.data.send(data)
        self.modified = False
        summary = len(data) if data else self.info.NoOutput
        details = format_summary_details(data) if data else ""
        self.info.set_output_summary(summary, details)
示例#9
0
    def test_details(self):
        """Check if details part of the summary is formatted correctly"""
        data = Table('zoo')
        n_features = len(data.domain.variables) + len(data.domain.metas)
        details = f'{len(data)} instances, ' \
                  f'{n_features} features\n' \
                  f'Features: {len(data.domain.attributes)} categorical\n' \
                  f'Target: categorical\n' \
                  f'Metas: string (not shown)'
        self.assertEqual(details, format_summary_details(data))

        data = Table('housing')
        n_features = len(data.domain.variables) + len(data.domain.metas)
        details = f'{len(data)} instances, ' \
                  f'{n_features} features\n' \
                  f'Features: {len(data.domain.attributes)} numeric\n' \
                  f'Target: numeric\n' \
                  f'Metas: —'
        self.assertEqual(details, format_summary_details(data))

        data = Table('heart_disease')
        n_features = len(data.domain.variables) + len(data.domain.metas)
        details = f'{len(data)} instances, ' \
                  f'{n_features} features\n' \
                  f'Features: {len(data.domain.attributes)} ' \
                  f'(7 categorical, 6 numeric)\n' \
                  f'Target: categorical\n' \
                  f'Metas: —'
        self.assertEqual(details, format_summary_details(data))

        data = make_table([continuous_full, continuous_missing],
                          target=[rgb_full, rgb_missing],
                          metas=[ints_full, ints_missing])
        n_features = len(data.domain.variables) + len(data.domain.metas)
        details = f'{len(data)} instances, ' \
                  f'{n_features} features\n' \
                  f'Features: {len(data.domain.attributes)} numeric\n' \
                  f'Target: {len(data.domain.class_vars)} categorical\n' \
                  f'Metas: {len(data.domain.metas)} categorical'
        self.assertEqual(details, format_summary_details(data))

        data = make_table([continuous_full, time_full, ints_full, rgb_missing],
                          target=[rgb_full, continuous_missing],
                          metas=[string_full, string_missing])
        n_features = len(data.domain.variables) + len(data.domain.metas)
        details = f'{len(data)} instances, ' \
                  f'{n_features} features\n' \
                  f'Features: {len(data.domain.attributes)} ' \
                  f'(2 categorical, 1 numeric, 1 time)\n' \
                  f'Target: {len(data.domain.class_vars)} ' \
                  f'(1 categorical, 1 numeric)\n' \
                  f'Metas: {len(data.domain.metas)} string (not shown)'
        self.assertEqual(details, format_summary_details(data))

        data = make_table([time_full, time_missing],
                          target=[ints_missing],
                          metas=None)
        details = f'{len(data)} instances, ' \
                  f'{len(data.domain.variables)} features\n' \
                  f'Features: {len(data.domain.attributes)} time\n'\
                  f'Target: categorical\n' \
                  f'Metas: —'
        self.assertEqual(details, format_summary_details(data))

        data = make_table([rgb_full, ints_full], target=None, metas=None)
        details = f'{len(data)} instances, ' \
                  f'{len(data.domain.variables)} features\n' \
                  f'Features: {len(data.domain.variables)} categorical\n' \
                  f'Target: —\n' \
                  f'Metas: —'
        self.assertEqual(details, format_summary_details(data))

        data = make_table([rgb_full], target=None, metas=None)
        details = f'{len(data)} instances, ' \
                  f'{len(data.domain.variables)} feature\n' \
                  f'Features: categorical\n' \
                  f'Target: —\n' \
                  f'Metas: —'
        self.assertEqual(details, format_summary_details(data))

        data = None
        self.assertEqual('', format_summary_details(data))
 def set_data(self, data: Optional[Table]):
     self.data = data
     summary = len(data) if data else self.info.NoInput
     details = format_summary_details(data) if data else ""
     self.info.set_input_summary(summary, details)
示例#11
0
 def _set_input_summary(self, dataset):
     summary = len(dataset) if dataset else self.info.NoInput
     details = format_summary_details(dataset) if dataset else ""
     self.info.set_input_summary(summary, details)
 def _set_summary(data, empty, setter):
     summary = len(data) if data else empty
     details = format_summary_details(data) if data else ""
     setter(summary, details)
示例#13
0
    def set_data(self, data=None):
        self.update_domain_role_hints()
        self.closeContext()
        self.data = data
        if data is not None:
            self.openContext(data)
            all_vars = data.domain.variables + data.domain.metas

            var_sig = lambda attr: (attr.name, vartype(attr))

            domain_hints = {
                var_sig(attr): ("attribute", i)
                for i, attr in enumerate(data.domain.attributes)
            }

            domain_hints.update({
                var_sig(attr): ("meta", i)
                for i, attr in enumerate(data.domain.metas)
            })

            if data.domain.class_vars:
                domain_hints.update({
                    var_sig(attr): ("class", i)
                    for i, attr in enumerate(data.domain.class_vars)
                })

            # update the hints from context settings
            domain_hints.update(self.domain_role_hints)

            attrs_for_role = lambda role: [
                (domain_hints[var_sig(attr)][1], attr) for attr in all_vars
                if domain_hints[var_sig(attr)][0] == role
            ]

            attributes = [
                attr for place, attr in sorted(attrs_for_role("attribute"),
                                               key=lambda a: a[0])
            ]
            classes = [
                attr for place, attr in sorted(attrs_for_role("class"),
                                               key=lambda a: a[0])
            ]
            metas = [
                attr for place, attr in sorted(attrs_for_role("meta"),
                                               key=lambda a: a[0])
            ]
            available = [
                attr for place, attr in sorted(attrs_for_role("available"),
                                               key=lambda a: a[0])
            ]

            self.used_attrs[:] = attributes
            self.class_attrs[:] = classes
            self.meta_attrs[:] = metas
            self.available_attrs[:] = available
            self.info.set_input_summary(len(data),
                                        format_summary_details(data))
        else:
            self.used_attrs[:] = []
            self.class_attrs[:] = []
            self.meta_attrs[:] = []
            self.available_attrs[:] = []
            self.info.set_input_summary(self.info.NoInput)
示例#14
0
 def set_data(self, data):
     self.data = data
     summary = len(data) if data else self.info.NoInput
     details = format_summary_details(data) if data else ""
     self.info.set_input_summary(summary, details)
     self.unconditional_apply()