def apply(self):
     """Output the transformed data."""
     if not self.attribute:
         self.send("Data", None)
         return
     domain = self.data.domain
     rules = self.active_rules
     # Transposition + stripping
     valid_rules = [label or pattern or n_matches
                    for (label, pattern), n_matches in
                    zip(rules, self.match_counts)]
     patterns = [pattern
                 for (_, pattern), valid in zip(rules, valid_rules)
                 if valid]
     names = [name for name, valid in zip(self.class_labels(), valid_rules)
              if valid]
     transformer = self.TRANSFORMERS[type(self.attribute)]
     compute_value = transformer(
         self.attribute, patterns, self.case_sensitive, self.match_beginning)
     new_class = DiscreteVariable(
         self.class_name, names, compute_value=compute_value)
     new_domain = Domain(
         domain.attributes, new_class, domain.metas + domain.class_vars)
     new_data = Table(new_domain, self.data)
     self.send("Data", new_data)
示例#2
0
    def apply_domain_edit(self):
        self.Warning.performance_warning.clear()
        self.Warning.renamed_vars.clear()
        if self.data is None:
            table = None
        else:
            domain, cols, renamed = \
                self.domain_editor.get_domain(self.data.domain, self.data,
                                              deduplicate=True)
            if not (domain.variables or domain.metas):
                table = None
            elif domain is self.data.domain:
                table = self.data
            else:
                X, y, m = cols
                table = Table.from_numpy(domain, X, y, m, self.data.W)
                table.name = self.data.name
                table.ids = np.array(self.data.ids)
                table.attributes = getattr(self.data, 'attributes', {})
                self._inspect_discrete_variables(domain)
            if renamed:
                self.Warning.renamed_vars(f"Renamed: {', '.join(renamed)}")

        summary = len(table) if table else self.info.NoOutput
        details = format_summary_details(table) if table else ""
        self.info.set_output_summary(summary, details)
        self.Outputs.data.send(table)
        self.apply_button.setEnabled(False)
示例#3
0
    def apply_domain_edit(self):
        if self.data is None:
            table = None
        else:
            domain, cols = self.domain_editor.get_domain(
                self.data.domain, self.data)
            printData = self.data
            printDomain = self.data.domain
            if not (domain.variables or domain.metas):
                table = None
            else:
                X, y, m = cols
                #X是输入,domain.attributes;y是输出class_var;m是元特征
                ## 下面解决将self.data的数据付给了table。
                # 1data's name; 2数据编号ids;3数据属性attributes
                table = Table.from_numpy(domain, X, y, m, self.data.W)
                table.name = self.data.name
                index = self.data.ids
                table.ids = np.array(self.data.ids)
                # print('ids',table.ids)

                data = self.data
                table.attributes = getattr(self.data, 'attributes', {})
                ## 将table的属性定义为{}
                ''' 对Ourputs的data赋值为table'''
        # print('table is :',table)
        # print('table domain',table.domain)
        # print('table name',table.name)
        # print('table class_var name',table.domain.class_vars[0].name)
        self.Outputs.data.send(table)
        self.apply_button.setEnabled(False)
示例#4
0
    def load_url(self, from_reload=False):
        url = self.combo.currentText()
        if not url:
            return
        prev_table = self.table
        try:
            with self.progressBar(3) as progress:
                progress.advance()
                table = Table.from_url(url)
                progress.advance()
        except Exception as e:
            log.exception("Couldn't load data from: %s", url)
            self.Error.error(try_(lambda: e.args[0], ''))
            self.table = None
        else:
            self.Error.clear()
            self.table = table
            self.combo.setTitleFor(self.combo.currentIndex(), table.name)
        self.set_info()

        def _equal(data1, data2):
            NAN = float('nan')
            return (try_(lambda: data1.checksum(),
                         NAN) == try_(lambda: data2.checksum(), NAN))

        if not (from_reload and _equal(prev_table, self.table)):
            self.commit()
 def setUp(self) -> None:
     test_data_path = os.path.join(os.path.dirname(__file__), 'datasets')
     self.widget = self.create_widget(OWAsSurvivalData)
     self.send_signal(self.widget.Inputs.data,
                      Table(f'{test_data_path}/toy_example.tab'))
     self.assertEqual(self.widget.controls.time_var.count(), 1)
     self.assertEqual(self.widget.controls.event_var.count(), 2)
示例#6
0
 def test_report_widgets_visualize(self):
     app = QApplication(sys.argv)
     rep = OWReport.get_instance()
     data = Table("zoo")
     widgets = self.visu_widgets
     self.assertEqual(len(widgets), 10)
     self._create_report(widgets, rep, data, app)
示例#7
0
    def __init__(self, parent=None, signalManager=None, settings=None):
        super().__init__(parent, signalManager, settings)

        self.data = None

        self.undoStack = QtGui.QUndoStack(self)

        self.plot = PaintDataPlot(self.mainArea, "Painted Plot", widget=self)
        self.classValuesModel = ColoredListModel(
            ["Class-1", "Class-2"],
            self,
            self.plot,
            flags=QtCore.Qt.ItemIsSelectable | QtCore.Qt.ItemIsEnabled
            | QtCore.Qt.ItemIsEditable)
        self.classValuesModel.dataChanged.connect(self.classNameChange)
        self.data = Table(
            Domain([
                ContinuousVariable(self.attr1),
                ContinuousVariable(self.attr2)
            ], DiscreteVariable("Class", values=self.classValuesModel)))

        self.toolsStackCache = {}

        self.initUI()
        self.initPlot()
        self.updatePlot()
示例#8
0
 def test_report_widgets_unsupervised_dist(self):
     rep = OWReport.get_instance()
     data = Table("zoo")
     dist = Euclidean(data)
     widgets = self.dist_widgets
     self.assertEqual(len(widgets), 2)
     self._create_report(widgets, rep, dist)
示例#9
0
    def apply_domain_edit(self):
        self.Warning.performance_warning.clear()
        self.Warning.renamed_vars.clear()
        if self.data is None:
            table = None
        else:
            domain, cols, renamed = \
                self.domain_editor.get_domain(self.data.domain, self.data,
                                              deduplicate=True)
            if not (domain.variables or domain.metas):
                table = None
            elif domain is self.data.domain:
                table = self.data
            else:
                X, y, m = cols
                table = Table.from_numpy(domain, X, y, m, self.data.W)
                table.name = self.data.name
                table.ids = np.array(self.data.ids)
                table.attributes = getattr(self.data, 'attributes', {})
                self._inspect_discrete_variables(domain)
            if renamed:
                self.Warning.renamed_vars(f"Renamed: {', '.join(renamed)}")

        self.Warning.multiple_targets(
            shown=table is not None and len(table.domain.class_vars) > 1)
        self.Outputs.data.send(table)
        self.apply_button.setEnabled(False)
示例#10
0
    def load_url(self, from_reload=False):
        url = self.combo.currentText()
        if not url:
            return
        prev_table = self.table
        try:
            with self.progressBar(3) as progress:
                progress.advance()
                table = Table.from_url(url)
                progress.advance()
        except Exception as e:
            log.exception("Couldn't load data from: %s", url)
            self.Error.error(try_(lambda: e.args[0], ''))
            self.table = None
        else:
            self.Error.clear()
            self.table = table
            self.combo.setTitleFor(self.combo.currentIndex(), table.name)
        self.set_info()

        def _equal(data1, data2):
            NAN = float('nan')
            return (try_(lambda: data1.checksum(), NAN) ==
                    try_(lambda: data2.checksum(), NAN))

        if not (from_reload and _equal(prev_table, self.table)):
            self.commit()
示例#11
0
 def test_incorrect_input_data(self):
     self.send_signal(self.widget.Inputs.data,
                      Table(f'{self.test_data_path}/toy_example.tab'))
     self.assertTrue(self.widget.Error.missing_survival_data.is_shown())
     self.assertIsNone(self.widget.data)
     self.assertIsNone(self.widget.time_var)
     self.assertIsNone(self.widget.event_var)
示例#12
0
    def setUp(self) -> None:
        self.test_data_path = os.path.join(os.path.dirname(__file__),
                                           'datasets')
        # create widgets
        self.as_survival = self.create_widget(OWAsSurvivalData)
        self.widget = self.create_widget(OWKaplanMeier)

        # handle survival data
        self.send_signal(self.as_survival.Inputs.data,
                         Table(f'{self.test_data_path}/toy_example.tab'))
        simulate.combobox_activate_item(
            self.as_survival.controls.time_var,
            self.as_survival._data.columns.Time.name)
        simulate.combobox_activate_item(
            self.as_survival.controls.event_var,
            self.as_survival._data.columns.Event.name)
        self.send_signal(self.widget.Inputs.data,
                         self.get_output(self.as_survival.Outputs.data))

        # check survival data
        time_var, event_var = get_survival_endpoints(self.widget.data.domain)
        self.assertEqual(time_var.name, 'Time')
        self.assertEqual(event_var.name, 'Event')
        self.assertIn(time_var, self.widget.data.domain.class_vars)
        self.assertIn(event_var, self.widget.data.domain.class_vars)

        # check if missing data detected
        self.assertTrue(self.widget.Warning.missing_values_detected.is_shown())

        self.widget.auto_commit = True

        # If we don't do this function ViewBox.mapSceneToView fails with num py.linalg.LinAlgError: Singular matrix
        vb = self.widget.graph.getViewBox()
        vb.resize(200, 200)
示例#13
0
def test_main():
    app = QtGui.QApplication([])
    data = Table("iris")
    w = OWDataSampler()
    w.set_data(data)
    w.show()
    return app.exec_()
示例#14
0
    def get_all_topics_table(self):
        """ Transform all topics from gensim model to table. """
        all_words = self._topics_words(self.n_words)
        all_weights = self._topics_weights(self.n_words)
        sorted_words = sorted(all_words[0])
        n_topics = len(all_words)

        X = []
        for words, weights in zip(all_words, all_weights):
            weights = [we for wo, we in sorted(zip(words, weights))]
            X.append(weights)
        X = np.array(X)

        # take only first n_topics; e.g. when user requested 10, but gensim
        # returns only 9 — when the rank is lower than num_topics requested
        names = np.array(self.topic_names[:n_topics], dtype=object)[:, None]

        attrs = [ContinuousVariable(w) for w in sorted_words]
        metas = [
            StringVariable('Topics'),
            ContinuousVariable('Marginal Topic Probability')
        ]

        topic_proba = np.array(self._marginal_probability(
            self.tokens, self.doc_topic),
                               dtype=object)

        t = Table.from_numpy(Domain(attrs, metas=metas),
                             X=X,
                             metas=np.hstack((names, topic_proba)))
        t.name = 'All topics'
        return t
示例#15
0
 def commit(self):
     continuizer = self.constructContinuizer()
     if self.data is not None:
         domain = continuizer(self.data)
         data = Table.from_table(domain, self.data)
         self.send("Data", data)
     else:
         self.send("Data", None)
示例#16
0
def test_main():
    a = QtGui.QApplication(sys.argv)
    ow = OWDataTable()

    iris = Table("iris")
    brown = Table("brown-selected")
    housing = Table("housing")
    ow.show()
    ow.raise_()

    ow.set_dataset(iris, iris.name)
    ow.set_dataset(brown, brown.name)
    ow.set_dataset(housing, housing.name)

    rval = a.exec()
    #     ow.saveSettings()
    return rval
示例#17
0
 def sendData(self):
     data = self.data
     values = set([str(ex.get_class()) for ex in data])
     if len(values) == 1:
         # Remove the useless class variable.
         domain = Domain(data.domain.attributes)
         data = Table(domain, data)
     self.send("Data", data)
示例#18
0
 def commit(self):
     continuizer = self.constructContinuizer()
     if self.data is not None:
         domain = continuizer(self.data)
         data = Table.from_table(domain, self.data)
         self.send("Data", data)
     else:
         self.send("Data", None)
示例#19
0
    def set_tree(self, model=None):
        """When a different tree is given."""
        self.clear()
        self.model = model

        if model is not None:
            # We need to know what kind of tree we have in order to properly
            # show colors and tooltips
            if isinstance(model, TreeClassifier):
                self.tree_type = self.CLASSIFICATION
            elif isinstance(model, TreeRegressor):
                self.tree_type = self.REGRESSION
            else:
                self.tree_type = self.GENERAL

            self.instances = model.instances
            # this bit is important for the regression classifier
            if self.instances is not None and \
                    self.instances.domain != model.domain:
                self.clf_dataset = Table.from_table(
                    self.model.domain, self.instances)
            else:
                self.clf_dataset = self.instances

            self.tree_adapter = self._get_tree_adapter(self.model)
            self.color_palette = self._tree_specific('_get_color_palette')()

            self.ptree.clear()
            self.ptree.set_tree(self.tree_adapter)
            self.ptree.set_tooltip_func(self._tree_specific('_get_tooltip'))
            self.ptree.set_node_color_func(
                self._tree_specific('_get_node_color')
            )

            self._tree_specific('_update_legend_colors')()
            self._update_legend_visibility()

            self._update_info_box()
            self._update_depth_slider()

            self._tree_specific('_update_target_class_combo')()

            self._update_main_area()

            # Get meta variables describing pythagoras tree if given from
            # forest.
            if hasattr(model, 'meta_size_calc_idx'):
                self.size_calc_idx = model.meta_size_calc_idx
            if hasattr(model, 'meta_size_log_scale'):
                self.size_log_scale = model.meta_size_log_scale
            # Updating the size calc redraws the whole tree
            if hasattr(model, 'meta_size_calc_idx') or \
                    hasattr(model, 'meta_size_log_scale'):
                self.update_size_calc()
            # The target class can also be passed from the meta properties
            if hasattr(model, 'meta_target_class_index'):
                self.target_class_index = model.meta_target_class_index
                self.update_colors()
示例#20
0
 def sendData(self):
     continuizer = self.constructContinuizer()
     if self.data is not None:
         domain = continuizer(self.data)
         data = Table.from_table(domain, self.data)
         self.send("Data", data)
     else:
         self.sendData("Data", None)
     self.data_changed = False
示例#21
0
 def test_report_widgets_visualize(self):
     _warnings = warnings.catch_warnings()
     _warnings.__enter__()
     warnings.simplefilter("ignore", OrangeDeprecationWarning)
     rep = OWReport.get_instance()
     data = Table("zoo")
     widgets = self.visu_widgets
     self._create_report(widgets, rep, data)
     _warnings.__exit__()
示例#22
0
    def test_z_score(self):
        domain = Domain([ContinuousVariable('A'), ContinuousVariable('B')])
        table = Table.from_list(domain, [[1, 2], [3, 4]])

        _table = ZScore(axis=0)(table)
        np.testing.assert_array_almost_equal([[-1, -1], [1, 1]], _table.X)

        _table = ZScore(axis=1)(table)
        np.testing.assert_array_almost_equal([[-1, 1], [-1, 1]], _table.X)
示例#23
0
 def sendData(self):
     continuizer = self.constructContinuizer()
     if self.data is not None:
         domain = continuizer(self.data)
         data = Table.from_table(domain, self.data)
         self.send("Data", data)
     else:
         self.sendData("Data", None)
     self.data_changed = False
示例#24
0
    def test_quantile_normalization(self):
        domain = Domain(
            [ContinuousVariable('A'), ContinuousVariable('B'), ContinuousVariable('C'), ContinuousVariable('D')]
        )
        table = Table.from_list(domain, [[5, 2, 3, 4], [4, 1, 4, 2], [3, 4, 6, 8]])
        _table = QuantileNormalization()(table)

        # expected result
        result = np.array([[5.66666667, 2, 3, 4.66666667], [5.166667, 2, 5.166667, 3], [2, 3, 4.66666667, 5.66666667]])
        np.testing.assert_array_almost_equal(result, _table.X)
示例#25
0
    def redo(self):
        self.classValuesModel.append(self.newClassLabel)
        newdomain = Domain([ContinuousVariable(self.widget.attr1),
                            ContinuousVariable(self.widget.attr2)],
                           DiscreteVariable("Class",
                                            values=self.classValuesModel))
        newdata = Table(newdomain)
        instances = [Instance(newdomain,
                              [float(ex[a]) for a in ex.domain.attributes] +
                              [str(ex.get_class())]) for ex in self.data]

        newdata.extend(instances)
        self.widget.data = newdata
        self.widget.removeClassLabel.setEnabled(len(self.classValuesModel) > 1)
        newindex = self.classValuesModel.index(len(self.classValuesModel) - 1)
        self.widget.classValuesView.selectionModel().select(
            newindex, QtGui.QItemSelectionModel.ClearAndSelect)
        self.widget.updatePlot()
        self.widget.updateCursor()
def upload(filename):
    annotations = {'tabular': {}, 'other': {}}

    with urllib.request.urlopen(os.path.join(URL_REMOTE,
                                             filename + '.info')) as url:
        data = json.loads(url.read().decode())
        annotations['tabular']['title'] = data['title']
        annotations['tabular']['cells'] = data['instances']
        annotations['tabular']['genes'] = data['num_of_genes']
        annotations['tabular']['tax_id'] = data['taxid']
        annotations['tabular'][
            'target'] = data['target'] if data['target'] else ''
        annotations['tabular']['tags'] = ', '.join(data['tags'])
        annotations['other']['description'] = data['description']
        annotations['other']['references'] = ' | '.join(data['references'])
        annotations['other']['source'] = data['source']
        annotations['other']['collection'] = data['collection']
        annotations['other']['year'] = data['year']
        annotations['other']['instances'] = data['instances']
        annotations['other']['variables'] = data['variables']

    data = Table(os.path.join(URL_REMOTE, filename))

    if '.tab.gz' in filename:
        filename = filename.replace('.tab.gz', '.pickle')

    data.save(filename)

    dataset = res.run('data-table-upload', input={'src': filename})

    # dataset = res.data.get(id=1)
    annotations['tabular']['file_name'] = filename
    annotations['tabular']['file_size'] = os.stat(filename).st_size

    # descriptor schema slug
    dataset.descriptor_schema = 'data_info'

    dataset.descriptor = annotations
    dataset.save()

    # cleanup
    os.remove(filename)
示例#27
0
    def redo(self):
        self.label = self.classValuesModel.pop(self.index)
        examples = [ex for ex in self.data
                    if str(ex.get_class()) != self.label]
        newdomain = Domain([ContinuousVariable(self.widget.attr1),
                            ContinuousVariable(self.widget.attr2)],
                           DiscreteVariable("Class",
                                            values=self.classValuesModel))
        newdata = Table(newdomain)
        for ex in examples:
            if str(ex.get_class()) != self.label and \
                    str(ex.get_class()) in self.classValuesModel:
                newdata.append(
                    Instance(newdomain,
                             [float(ex[a]) for a in ex.domain.attributes] +
                             [str(ex.get_class())]))

        self.widget.data = newdata
        self.widget.updatePlot()
        self.widget.updateCursor()
示例#28
0
 def redo(self):
     newdomain = Domain([ContinuousVariable(self.widget.attr1),
                         ContinuousVariable(self.widget.attr2)],
                        DiscreteVariable("Class",
                                         values=self.classValuesModel))
     newdata = Table(newdomain)
     for ex in self.data:
         print(ex.get_class())
         if str(ex.get_class()) not in self.classValuesModel:
             self.oldLabelName = str(ex.get_class())
             instance = Instance(
                 newdomain, [float(ex[a]) for a in ex.domain.attributes] +
                            [self.changedLabel])
             newdata.append(instance)
         else:
             newdata.append(
                 Instance(newdomain,
                          [float(ex[a]) for a in ex.domain.attributes] +
                          [str(ex.get_class())]))
     self.widget.data = newdata
     self.widget.updatePlot()
示例#29
0
    def redo(self):
        self.label = self.classValuesModel.pop(self.index)
        examples = [
            ex for ex in self.data if str(ex.get_class()) != self.label
        ]
        newdomain = Domain([
            ContinuousVariable(self.widget.attr1),
            ContinuousVariable(self.widget.attr2)
        ], DiscreteVariable("Class", values=self.classValuesModel))
        newdata = Table(newdomain)
        for ex in examples:
            if str(ex.get_class()) != self.label and \
                    str(ex.get_class()) in self.classValuesModel:
                newdata.append(
                    Instance(newdomain,
                             [float(ex[a]) for a in ex.domain.attributes] +
                             [str(ex.get_class())]))

        self.widget.data = newdata
        self.widget.updatePlot()
        self.widget.updateCursor()
示例#30
0
def main():  # pragma: no cover
    """Simple test for manual inspection of the widget"""
    import sys
    from AnyQt.QtWidgets import QApplication

    a = QApplication(sys.argv)
    table = Table("zoo")
    ow = OWCreateClass()
    ow.show()
    ow.set_data(table)
    a.exec()
    ow.saveSettings()
示例#31
0
    def set_tree(self, model=None):
        """When a different tree is given."""
        self.clear()
        self.model = model

        if model is not None:
            self.instances = model.instances
            # this bit is important for the regression classifier
            if self.instances is not None and \
                    self.instances.domain != model.domain:
                self.clf_dataset = Table.from_table(self.model.domain,
                                                    self.instances)
            else:
                self.clf_dataset = self.instances

            self.tree_adapter = self._get_tree_adapter(self.model)
            self.ptree.clear()

            self.ptree.set_tree(
                self.tree_adapter,
                weight_adjustment=self.SIZE_CALCULATION[self.size_calc_idx][1],
                target_class_index=self.target_class_index,
            )

            self._update_depth_slider()
            self.color_palette = self.ptree.root.color_palette
            self._update_legend_colors()
            self._update_legend_visibility()
            self._update_info_box()
            self._update_target_class_combo()

            self._update_main_area()

            # The target class can also be passed from the meta properties
            # This must be set after `_update_target_class_combo`
            if hasattr(model, 'meta_target_class_index'):
                self.target_class_index = model.meta_target_class_index
                self.update_colors()

            # Get meta variables describing what the settings should look like
            # if the tree is passed from the Pythagorean forest widget.
            if hasattr(model, 'meta_size_calc_idx'):
                self.size_calc_idx = model.meta_size_calc_idx
                self.update_size_calc()

            # TODO There is still something wrong with this
            # if hasattr(model, 'meta_depth_limit'):
            #     self.depth_limit = model.meta_depth_limit
            #     self.update_depth()

        self.send(ANNOTATED_DATA_SIGNAL_NAME,
                  create_annotated_table(self.instances, None))
示例#32
0
    def redo(self):
        self.classValuesModel.append(self.newClassLabel)
        newdomain = Domain([
            ContinuousVariable(self.widget.attr1),
            ContinuousVariable(self.widget.attr2)
        ], DiscreteVariable("Class", values=self.classValuesModel))
        newdata = Table(newdomain)
        instances = [
            Instance(newdomain,
                     [float(ex[a])
                      for a in ex.domain.attributes] + [str(ex.get_class())])
            for ex in self.data
        ]

        newdata.extend(instances)
        self.widget.data = newdata
        self.widget.removeClassLabel.setEnabled(len(self.classValuesModel) > 1)
        newindex = self.classValuesModel.index(len(self.classValuesModel) - 1)
        self.widget.classValuesView.selectionModel().select(
            newindex, QtGui.QItemSelectionModel.ClearAndSelect)
        self.widget.updatePlot()
        self.widget.updateCursor()
示例#33
0
    def test_report_widgets_regression(self):
        rep = OWReport.get_instance()
        data = Table("housing")
        widgets = self.regr_widgets

        w = self.create_widget(OWTreeGraph)
        mod = RegressionTreeLearner(max_depth=3)(data)
        mod.instances = data
        w.ctree(mod)
        w.create_report_html()
        rep.make_report(w)

        self._create_report(widgets, rep, data)
示例#34
0
    def test_report_widgets_model(self):
        rep = OWReport.get_instance()
        data = Table("titanic")
        widgets = self.model_widgets

        w = self.create_widget(OWTreeGraph)
        clf = TreeLearner(max_depth=3)(data)
        clf.instances = data
        w.ctree(clf)
        w.create_report_html()
        rep.make_report(w)

        self._create_report(widgets, rep, data)
示例#35
0
    def commit(self):
        if self._embedder != '' and self._smiles_attr != '':
            smiles = self.data[:, self._smiles_attr].metas.flatten()
            embedded, valid = self.to_fingerprints(smiles, self._embedder)
            invalid = list(set(range(len(smiles))) - set(valid))

            if not valid == []:

                domain = [ContinuousVariable.make("C_{}".format(x)) for x in
                         range(embedded.shape[1])]
                if self._embedder == MACCS:
                    domain = [ContinuousVariable.make(name)
                              for name, _ in Chem.MACCSkeys.smartsPatts.values()]
                    domain.append(ContinuousVariable.make('?'))
                embedded_table = Table.from_numpy(
                    Domain(domain,
                        self.data.domain.class_vars,
                        self.data.domain.metas
                    ),
                    embedded,
                    self.data.Y[valid],
                    self.data.metas[valid],
                    self.data.W[valid]
                )
                self.Outputs.embedded_smiles.send(embedded_table)
            else:
                self.Outputs.embedded_smiles.send(None)

            if not invalid == []:
                invalid_table = Table.from_numpy(
                    self.data.domain,
                    self.data.X[invalid],
                    self.data.Y[invalid],
                    self.data.metas[invalid],
                    self.data.W[invalid]
                )
                self.Outputs.skipped_smiles.send(invalid_table)
            else:
                self.Outputs.skipped_smiles.send(None)
示例#36
0
    def apply_domain_edit(self):
        attributes = []
        class_vars = []
        metas = []
        places = [attributes, class_vars, metas]
        X, y, m = [], [], []
        cols = [X, y, m]  # Xcols, Ycols, Mcols

        def is_missing(x):
            return str(x) in ("nan", "")

        for column, (name, tpe, place, vals, is_con), (orig_var, orig_plc) in \
            zip(count(), self.editor_model.variables,
                chain([(at, 0) for at in self.data.domain.attributes],
                      [(cl, 1) for cl in self.data.domain.class_vars],
                      [(mt, 2) for mt in self.data.domain.metas])):
            if place == 3:
                continue
            if orig_plc == 2:
                col_data = list(chain(*self.data[:, orig_var].metas))
            else:
                col_data = list(chain(*self.data[:, orig_var]))
            if name == orig_var.name and tpe == type(orig_var):
                var = orig_var
            elif tpe == DiscreteVariable:
                values = list(
                    str(i) for i in set(col_data) if not is_missing(i))
                var = tpe(name, values)
                col_data = [
                    np.nan if is_missing(x) else values.index(str(x))
                    for x in col_data
                ]
            elif tpe == StringVariable and type(orig_var) == DiscreteVariable:
                var = tpe(name)
                col_data = [
                    orig_var.repr_val(x) if not np.isnan(x) else ""
                    for x in col_data
                ]
            else:
                var = tpe(name)
            places[place].append(var)
            cols[place].append(col_data)
        domain = Domain(attributes, class_vars, metas)
        X = np.array(X).T if len(X) else np.empty((len(self.data), 0))
        y = np.array(y).T if len(y) else None
        dtpe = object if any(
            isinstance(m, StringVariable) for m in domain.metas) else float
        m = np.array(m, dtype=dtpe).T if len(m) else None
        table = Table.from_numpy(domain, X, y, m, self.data.W)
        self.send("Data", table)
        self.apply_button.setEnabled(False)
示例#37
0
    def from_table(cls, domain, source, row_indices=...):
        """
        Create a new table from selected columns and/or rows of an existing
        one. The columns are chosen using a domain. The domain may also include
        variables that do not appear in the source table; they are computed
        from source variables if possible.

        The resulting data may be a
        - new LazyTable if source is a LazyTable, domain contains only
          attributes of the source and row_indices is not specified.
          This should ensure that the SelectAttributes widget works.
        - a normal Table otherwise, which could apparently be view or a copy
          of the existing data. However, what happens with a view of
          growing data is unknown.

        :param domain: the domain for the new table
        :type domain: Orange.data.Domain
        :param source: the source table
        :type source: Orange.data.Table
        :param row_indices: indices of the rows to include
        :type row_indices: a slice or a sequence
        :return: a new table
        :rtype: Orange.data.Table
        """
        # TODO: Improve the lazyness support for other cases?
        # TODO: Investigate this computing of new variables.
        subdomain = all(v in source.domain for v in domain)
        
        if isinstance(source, LazyTable) and subdomain:
            table_new = LazyTable.from_domain(domain)
            table_new.stop_pulling = True # Should only be done by first LazyTable?
            table_new.table_origin = source
            # Fill the table with the rows that were already materialized.
            # TODO: Do something smarter here?
            #   Definitely, currently we need the copy.copy to prevent 
            #   RuntimeError: dictionary changed size during iteration
            for row_index_full in copy.copy(table_new.table_origin.row_mapping):
                for variable in table_new.domain:
                    # pylint: disable=unused-variable
                    value = table_new[row_index_full][variable]
        else:
            table_new = Table.from_table(
                domain=domain,
                source=source,
                row_indices=row_indices,
            )

        return table_new
 def retrieve(self, url):
     if not url: return
     progress = gui.ProgressBar(self, 10)
     for i in range(3): progress.advance()
     try: table = Table.from_url(url)
     except Exception as e:
         import traceback
         log.error(traceback.format_exc())
         log.error("Couldn't load spreadsheet %s: %s", url, e)
         self.error("Couldn't load spreadsheet. Ensure correct read permissions; rectangular, top-left aligned sheet data ...")
         return
     else:
         for i in range(7): progress.advance()
     finally:
         progress.finish()
     return table
示例#39
0
    def apply_domain_edit(self):
        if self.data is None:
            table = None
        else:
            domain, cols = self.domain_editor.get_domain(self.data.domain, self.data)
            if not (domain.variables or domain.metas):
                table = None
            else:
                X, y, m = cols
                table = Table.from_numpy(domain, X, y, m, self.data.W)
                table.name = self.data.name
                table.ids = np.array(self.data.ids)
                table.attributes = getattr(self.data, 'attributes', {})

        self.Outputs.data.send(table)
        self.apply_button.setEnabled(False)
示例#40
0
    def apply_domain_edit(self):
        attributes = []
        class_vars = []
        metas = []
        places = [attributes, class_vars, metas]
        X, y, m = [], [], []
        cols = [X, y, m]  # Xcols, Ycols, Mcols

        def is_missing(x):
            return str(x) in ("nan", "")

        for column, (name, tpe, place, vals, is_con), (orig_var, orig_plc) in \
            zip(count(), self.editor_model.variables,
                chain([(at, 0) for at in self.data.domain.attributes],
                      [(cl, 1) for cl in self.data.domain.class_vars],
                      [(mt, 2) for mt in self.data.domain.metas])):
            if place == 3:
                continue
            if orig_plc == 2:
                col_data = list(chain(*self.data[:, orig_var].metas))
            else:
                col_data = list(chain(*self.data[:, orig_var]))
            if name == orig_var.name and tpe == type(orig_var):
                var = orig_var
            elif tpe == DiscreteVariable:
                values = list(str(i) for i in set(col_data) if not is_missing(i))
                var = tpe(name, values)
                col_data = [np.nan if is_missing(x) else values.index(str(x))
                            for x in col_data]
            elif tpe == StringVariable and type(orig_var) == DiscreteVariable:
                var = tpe(name)
                col_data = [orig_var.repr_val(x) if not np.isnan(x) else ""
                            for x in col_data]
            else:
                var = tpe(name)
            places[place].append(var)
            cols[place].append(col_data)
        domain = Domain(attributes, class_vars, metas)
        X = np.array(X).T if len(X) else np.empty((len(self.data), 0))
        y = np.array(y).T if len(y) else None
        dtpe = object if any(isinstance(m, StringVariable)
                             for m in domain.metas) else float
        m = np.array(m, dtype=dtpe).T if len(m) else None
        table = Table.from_numpy(domain, X, y, m, self.data.W)
        self.send("Data", table)
        self.apply_button.setEnabled(False)
示例#41
0
    def insert_topics_into_corpus(self, corp_in):
        """
        Insert topical representation into corpus.

        :param corp_in: Corpus into whic we want to insert topical representations
        :return: `Orange.data.table.Table`
        """
        matrix = matutils.corpus2dense(self.corpus,
                                       num_terms=self.num_topics).T

        # Generate the new table.
        attr = [ContinuousVariable(n) for n in self.topic_names]
        domain = Domain(attr,
                        corp_in.domain.class_vars,
                        metas=corp_in.domain.metas)

        return Table.from_numpy(domain,
                                matrix,
                                Y=corp_in._Y,
                                metas=corp_in.metas)
示例#42
0
    def get_all_topics_table(self):
        """ Transform all topics from gensim model to table. """
        all_words = self._topics_words(self.n_words)
        all_weights = self._topics_weights(self.n_words)
        sorted_words = sorted(all_words[0])
        n_topics = len(all_words)

        X = []
        for words, weights in zip(all_words, all_weights):
            weights = [we for wo, we in sorted(zip(words, weights))]
            X.append(weights)
        X = np.array(X).T

        # take only first n_topics; e.g. when user requested 10, but gensim
        # returns only 9 — when the rank is lower than num_topics requested
        attrs = [ContinuousVariable(n)
                 for n in self.topic_names[:n_topics]]

        t = Table.from_numpy(Domain(attrs, metas=[StringVariable('Word')]),
                             X=X, metas=np.array(sorted_words)[:, None])
        t.name = 'All topics'
        return t
示例#43
0
    def set_tree(self, model=None):
        """When a different tree is given."""
        self.clear()
        self.model = model

        if model is not None:
            # We need to know what kind of tree we have in order to properly
            # show colors and tooltips
            if model.domain.class_var.is_discrete:
                self.tree_type = self.CLASSIFICATION
            elif model.domain.class_var.is_continuous:
                self.tree_type = self.REGRESSION
            else:
                self.tree_type = self.GENERAL

            self.instances = model.instances
            # this bit is important for the regression classifier
            if self.instances is not None and \
                    self.instances.domain != model.domain:
                self.clf_dataset = Table.from_table(
                    self.model.domain, self.instances)
            else:
                self.clf_dataset = self.instances

            self.tree_adapter = self._get_tree_adapter(self.model)
            self.color_palette = self._tree_specific('_get_color_palette')()

            self.ptree.clear()
            self.ptree.set_tree(self.tree_adapter)
            self.ptree.set_tooltip_func(self._tree_specific('_get_tooltip'))
            self.ptree.set_node_color_func(
                self._tree_specific('_get_node_color')
            )

            self._tree_specific('_update_legend_colors')()
            self._update_legend_visibility()

            self._update_info_box()
            self._update_depth_slider()

            self._tree_specific('_update_target_class_combo')()

            self._update_main_area()

            # Get meta variables describing pythagoras tree if given from
            # forest.
            if hasattr(model, 'meta_size_calc_idx'):
                self.size_calc_idx = model.meta_size_calc_idx
            if hasattr(model, 'meta_size_log_scale'):
                self.size_log_scale = model.meta_size_log_scale
            # Updating the size calc redraws the whole tree
            if hasattr(model, 'meta_size_calc_idx') or \
                    hasattr(model, 'meta_size_log_scale'):
                self.update_size_calc()
            # The target class can also be passed from the meta properties
            if hasattr(model, 'meta_target_class_index'):
                self.target_class_index = model.meta_target_class_index
                self.update_colors()
            # TODO this messes up the viewport in pythagoras tree viewer
            # it seems the viewport doesn't reset its size if this is applied
            # if hasattr(model, 'meta_depth_limit'):
            #     self.depth_limit = model.meta_depth_limit
            #     self.update_depth()
        self.send(ANNOTATED_DATA_SIGNAL_NAME,
                  create_annotated_table(self.instances, None))
示例#44
0
    def received_table_load_votable(self, private_key, sender_id, msg_id, mtype, parameters, extra):
        """
        Read the received VOTable and broadcast.
        """
        print("Call:", private_key, sender_id, msg_id, mtype, parameters, extra)

        # Retrieve and read the VOTable.
        url_table = parameters['url']

        # sys.stdout is redirected by canvas.__main__ via redirect_stdout()
        # in canvas.util.redirect to an
        # Orange.canvas.application.outputview.TextStream object. This
        # has a @queued_blocking flush(), which can result in an "Result not
        # yet ready" RuntimeError from the QueuedCallEvent class.
        # This exception is raised because astropy.io.votable.table uses
        # astropy.utils.xml.iterparser, which uses astropy.utils.data,
        # which uses the Spinner class from astropy.utils.console, which
        # finally uses stdout to output a progress indicator.
        # Orange has its own mechanisms for indicating progress, so it would
        # perhaps be better to try to use that.
        # For now, the Orange redirect of stdout is temporarily disabled
        # while the votable is being parsed.

        stdout_orange = sys.stdout
        sys.stdout = sys.__stdout__
        votable_tree = votable.parse(url_table)
        sys.stdout = stdout_orange

        print("VOTable Tree created")
        votable_table = votable_tree.get_first_table()
        #type(votable)
        #<class 'astropy.io.votable.tree.Table'>
        table = votable_table.to_table()
        #type(table)
        #<class 'astropy.table.table.Table'>
        print("AstroPy table made")



        # This does not allow classes.
        if False:
            # Convert the VOTable to a Domain.
            # TODO: Y en metas
            attributes = [
                ContinuousVariable(name=column)
                for column in table.columns
            ]
            domain = Domain(attributes = attributes)
            print("Domain made")

            # Convert the VOTable to a Table
            # Append the Table to LazyTable self.data.
            # (Re)send self.data).
            # TODO: Use from_domain() implicitly from __init__().
            # TODO: Include support to stop_pulling immediately.
            otable = Table.from_domain(
                #otable = LazyTable.from_domain(
                #otable = LazyTable(
                domain = domain,
                n_rows = len(table),
                #    stop_pulling = True,
            )
            otable.stop_pulling = True
            # TODO: set widget_origin?
            print("Orange Table initialized")
            for i, variable in enumerate(otable.domain.variables):
                otable.X[:,i] = table.columns[variable.name].data

        attributes = [
            ContinuousVariable(name=column)
            for column in table.columns if not 'CLASS' in column
        ]
        class_vars = [
            DiscreteVariable(name=column, values=['alpha', 'beta'])
            for column in table.columns if 'CLASS' in column
        ]
        domain = Domain(
            attributes=attributes,
            class_vars=class_vars,
        )

        otable = Table.from_domain(
            domain = domain,
            n_rows = len(table),
        )
        otable.stop_pulling = True
        print("Orange Table initialized")
        for i, variable in enumerate(otable.domain.attributes):
            otable.X[:,i] = table.columns[variable.name].data
        for i, variable in enumerate(otable.domain.class_vars):
            #otable.Y[:,i] = table.columns[variable.name].data
            #otable.Y[:] = table.columns[variable.name].data
            otable.Y[:] = table.columns[variable.name].data.round()

        print("Orange Table filled")
        if self.data is None:
            self.data = otable
        else:
            self.data.extend(otable)
            # TODO: Why doesn't this work?:
            #for row in otable:
            #    self.data.append(row)


        self.send("Data", self.data)
        print("Orange Table send A")
示例#45
0
    def open_file(self, fn):
        self.error()
        self.warning()
        self.information()
        fn_original = fn
        if not os.path.exists(fn):
            dir_name, basename = os.path.split(fn)
            if os.path.exists(os.path.join(".", basename)):
                fn = os.path.join(".", basename)
                self.information("Loading '{}' from the current directory."
                                 .format(basename))
        if fn == "(none)":
            self.send("Data", None)
            self.infoa.setText("No data loaded")
            self.infob.setText("")
            self.warnings.setText("")
            return

        self.loaded_file = ""

        data = None
        err_value = None
        try:
            # TODO handle self.new_variables
            data = Table(fn)
            self.loaded_file = fn
        except Exception as exc:
            err_value = str(exc)
            if "is being loaded as" in str(err_value):
                try:
                    data = Table(fn)
                    self.loaded_file = fn
                    self.warning(0, err_value)
                except:
                    data = None
        if err_value is not None:
            if fn.startswith("http"):
                err_value = "File '{}' does not contain valid data".format(
                    os.path.basename(fn)
                )
            ind = self.file_combo.currentIndex()
            text = self.file_combo.currentText()
            self.file_combo.removeItem(ind)
            self.file_combo.lineEdit().setText(text)
            if ind < len(self.recent_paths) and \
                            self.recent_paths[ind].abspath == fn_original:
                del self.recent_paths[ind]
            self.error(err_value)
            self.infoa.setText('Data was not loaded due to an error.')
            self.infob.setText('Error:')
            self.warnings.setText(err_value)

        if data is None:
            self.dataReport = None
        else:
            domain = data.domain
            self.infoa.setText(
                "{} instance(s), {} feature(s), {} meta attribute(s)"
                .format(len(data), len(domain.attributes), len(domain.metas)))
            if domain.has_continuous_class:
                self.infob.setText("Regression; numerical class.")
            elif domain.has_discrete_class:
                self.infob.setText("Classification; " +
                                   "discrete class with {} values."
                                   .format(len(domain.class_var.values)))
            elif data.domain.class_vars:
                self.infob.setText("Multi-target; {} target variables."
                                   .format(len(data.domain.class_vars)))
            else:
                self.infob.setText("Data has no target variable.")
            self.warnings.setText("")

            add_origin(data, fn)
            # make new data and send it
            file_name = os.path.split(fn)[1]
            if "." in file_name:
                data.name = file_name[:file_name.rfind('.')]
            else:
                data.name = file_name

            self.dataReport = self.prepareDataReport(data)
        self.send("Data", data)
示例#46
0
    def open_file(self, fn):
        self.error()
        self.warning()
        self.information()


        if not os.path.exists(fn):
            dirname, basename = os.path.split(fn)
            if os.path.exists(os.path.join(".", basename)):
                fn = os.path.join(".", basename)
                self.information("Loading '{}' from the current directory."
                                 .format(basename))
        if fn == "(none)":
            self.send("Data", None)
            self.infoa.setText("No data loaded")
            self.infob.setText("")
            self.warnings.setText("")
            return

        self.loaded_file = ""

        data = None
        try:
            # TODO handle self.new_variables
            data = Table(fn)
            self.loaded_file = fn
        except Exception as errValue:
            if "is being loaded as" in str(errValue):
                try:
                    data = Table(fn)
                    self.loaded_file = fn
                    self.warning(0, errValue)
                except:
                    self.error(errValue)
                    self.infoa.setText('Data was not loaded due to an error.')
                    self.infob.setText('Error:')
                    self.warnings.setText(errValue)

        if data is None:
            self.dataReport = None
        else:
            domain = data.domain
            self.infoa.setText(
                '{} instance(s), {} feature(s), {} meta attributes'
                .format(len(data), len(domain.attributes), len(domain.metas)))
            if isinstance(domain.class_var, ContinuousVariable):
                self.infob.setText('Regression; Numerical class.')
            elif isinstance(domain.class_var, DiscreteVariable):
                self.infob.setText('Classification; Discrete class with {} values.'
                                   .format(len(domain.class_var.values)))
            elif data.domain.class_vars:
                self.infob.setText('Multi-target; {} target variables.'
                                   .format(len(data.domain.class_vars)))
            else:
                self.infob.setText("Data has no target variable.")

            addOrigin(data, fn)
            # make new data and send it
            fName = os.path.split(fn)[1]
            if "." in fName:
                data.name = fName[:fName.rfind('.')]
            else:
                data.name = fName

            self.dataReport = self.prepareDataReport(data)
        self.send("Data", data)