def apply(self): """Output the transformed data.""" if not self.attribute: self.send("Data", None) return domain = self.data.domain rules = self.active_rules # Transposition + stripping valid_rules = [label or pattern or n_matches for (label, pattern), n_matches in zip(rules, self.match_counts)] patterns = [pattern for (_, pattern), valid in zip(rules, valid_rules) if valid] names = [name for name, valid in zip(self.class_labels(), valid_rules) if valid] transformer = self.TRANSFORMERS[type(self.attribute)] compute_value = transformer( self.attribute, patterns, self.case_sensitive, self.match_beginning) new_class = DiscreteVariable( self.class_name, names, compute_value=compute_value) new_domain = Domain( domain.attributes, new_class, domain.metas + domain.class_vars) new_data = Table(new_domain, self.data) self.send("Data", new_data)
def apply_domain_edit(self): self.Warning.performance_warning.clear() self.Warning.renamed_vars.clear() if self.data is None: table = None else: domain, cols, renamed = \ self.domain_editor.get_domain(self.data.domain, self.data, deduplicate=True) if not (domain.variables or domain.metas): table = None elif domain is self.data.domain: table = self.data else: X, y, m = cols table = Table.from_numpy(domain, X, y, m, self.data.W) table.name = self.data.name table.ids = np.array(self.data.ids) table.attributes = getattr(self.data, 'attributes', {}) self._inspect_discrete_variables(domain) if renamed: self.Warning.renamed_vars(f"Renamed: {', '.join(renamed)}") summary = len(table) if table else self.info.NoOutput details = format_summary_details(table) if table else "" self.info.set_output_summary(summary, details) self.Outputs.data.send(table) self.apply_button.setEnabled(False)
def apply_domain_edit(self): if self.data is None: table = None else: domain, cols = self.domain_editor.get_domain( self.data.domain, self.data) printData = self.data printDomain = self.data.domain if not (domain.variables or domain.metas): table = None else: X, y, m = cols #X是输入,domain.attributes;y是输出class_var;m是元特征 ## 下面解决将self.data的数据付给了table。 # 1data's name; 2数据编号ids;3数据属性attributes table = Table.from_numpy(domain, X, y, m, self.data.W) table.name = self.data.name index = self.data.ids table.ids = np.array(self.data.ids) # print('ids',table.ids) data = self.data table.attributes = getattr(self.data, 'attributes', {}) ## 将table的属性定义为{} ''' 对Ourputs的data赋值为table''' # print('table is :',table) # print('table domain',table.domain) # print('table name',table.name) # print('table class_var name',table.domain.class_vars[0].name) self.Outputs.data.send(table) self.apply_button.setEnabled(False)
def load_url(self, from_reload=False): url = self.combo.currentText() if not url: return prev_table = self.table try: with self.progressBar(3) as progress: progress.advance() table = Table.from_url(url) progress.advance() except Exception as e: log.exception("Couldn't load data from: %s", url) self.Error.error(try_(lambda: e.args[0], '')) self.table = None else: self.Error.clear() self.table = table self.combo.setTitleFor(self.combo.currentIndex(), table.name) self.set_info() def _equal(data1, data2): NAN = float('nan') return (try_(lambda: data1.checksum(), NAN) == try_(lambda: data2.checksum(), NAN)) if not (from_reload and _equal(prev_table, self.table)): self.commit()
def setUp(self) -> None: test_data_path = os.path.join(os.path.dirname(__file__), 'datasets') self.widget = self.create_widget(OWAsSurvivalData) self.send_signal(self.widget.Inputs.data, Table(f'{test_data_path}/toy_example.tab')) self.assertEqual(self.widget.controls.time_var.count(), 1) self.assertEqual(self.widget.controls.event_var.count(), 2)
def test_report_widgets_visualize(self): app = QApplication(sys.argv) rep = OWReport.get_instance() data = Table("zoo") widgets = self.visu_widgets self.assertEqual(len(widgets), 10) self._create_report(widgets, rep, data, app)
def __init__(self, parent=None, signalManager=None, settings=None): super().__init__(parent, signalManager, settings) self.data = None self.undoStack = QtGui.QUndoStack(self) self.plot = PaintDataPlot(self.mainArea, "Painted Plot", widget=self) self.classValuesModel = ColoredListModel( ["Class-1", "Class-2"], self, self.plot, flags=QtCore.Qt.ItemIsSelectable | QtCore.Qt.ItemIsEnabled | QtCore.Qt.ItemIsEditable) self.classValuesModel.dataChanged.connect(self.classNameChange) self.data = Table( Domain([ ContinuousVariable(self.attr1), ContinuousVariable(self.attr2) ], DiscreteVariable("Class", values=self.classValuesModel))) self.toolsStackCache = {} self.initUI() self.initPlot() self.updatePlot()
def test_report_widgets_unsupervised_dist(self): rep = OWReport.get_instance() data = Table("zoo") dist = Euclidean(data) widgets = self.dist_widgets self.assertEqual(len(widgets), 2) self._create_report(widgets, rep, dist)
def apply_domain_edit(self): self.Warning.performance_warning.clear() self.Warning.renamed_vars.clear() if self.data is None: table = None else: domain, cols, renamed = \ self.domain_editor.get_domain(self.data.domain, self.data, deduplicate=True) if not (domain.variables or domain.metas): table = None elif domain is self.data.domain: table = self.data else: X, y, m = cols table = Table.from_numpy(domain, X, y, m, self.data.W) table.name = self.data.name table.ids = np.array(self.data.ids) table.attributes = getattr(self.data, 'attributes', {}) self._inspect_discrete_variables(domain) if renamed: self.Warning.renamed_vars(f"Renamed: {', '.join(renamed)}") self.Warning.multiple_targets( shown=table is not None and len(table.domain.class_vars) > 1) self.Outputs.data.send(table) self.apply_button.setEnabled(False)
def test_incorrect_input_data(self): self.send_signal(self.widget.Inputs.data, Table(f'{self.test_data_path}/toy_example.tab')) self.assertTrue(self.widget.Error.missing_survival_data.is_shown()) self.assertIsNone(self.widget.data) self.assertIsNone(self.widget.time_var) self.assertIsNone(self.widget.event_var)
def setUp(self) -> None: self.test_data_path = os.path.join(os.path.dirname(__file__), 'datasets') # create widgets self.as_survival = self.create_widget(OWAsSurvivalData) self.widget = self.create_widget(OWKaplanMeier) # handle survival data self.send_signal(self.as_survival.Inputs.data, Table(f'{self.test_data_path}/toy_example.tab')) simulate.combobox_activate_item( self.as_survival.controls.time_var, self.as_survival._data.columns.Time.name) simulate.combobox_activate_item( self.as_survival.controls.event_var, self.as_survival._data.columns.Event.name) self.send_signal(self.widget.Inputs.data, self.get_output(self.as_survival.Outputs.data)) # check survival data time_var, event_var = get_survival_endpoints(self.widget.data.domain) self.assertEqual(time_var.name, 'Time') self.assertEqual(event_var.name, 'Event') self.assertIn(time_var, self.widget.data.domain.class_vars) self.assertIn(event_var, self.widget.data.domain.class_vars) # check if missing data detected self.assertTrue(self.widget.Warning.missing_values_detected.is_shown()) self.widget.auto_commit = True # If we don't do this function ViewBox.mapSceneToView fails with num py.linalg.LinAlgError: Singular matrix vb = self.widget.graph.getViewBox() vb.resize(200, 200)
def test_main(): app = QtGui.QApplication([]) data = Table("iris") w = OWDataSampler() w.set_data(data) w.show() return app.exec_()
def get_all_topics_table(self): """ Transform all topics from gensim model to table. """ all_words = self._topics_words(self.n_words) all_weights = self._topics_weights(self.n_words) sorted_words = sorted(all_words[0]) n_topics = len(all_words) X = [] for words, weights in zip(all_words, all_weights): weights = [we for wo, we in sorted(zip(words, weights))] X.append(weights) X = np.array(X) # take only first n_topics; e.g. when user requested 10, but gensim # returns only 9 — when the rank is lower than num_topics requested names = np.array(self.topic_names[:n_topics], dtype=object)[:, None] attrs = [ContinuousVariable(w) for w in sorted_words] metas = [ StringVariable('Topics'), ContinuousVariable('Marginal Topic Probability') ] topic_proba = np.array(self._marginal_probability( self.tokens, self.doc_topic), dtype=object) t = Table.from_numpy(Domain(attrs, metas=metas), X=X, metas=np.hstack((names, topic_proba))) t.name = 'All topics' return t
def commit(self): continuizer = self.constructContinuizer() if self.data is not None: domain = continuizer(self.data) data = Table.from_table(domain, self.data) self.send("Data", data) else: self.send("Data", None)
def test_main(): a = QtGui.QApplication(sys.argv) ow = OWDataTable() iris = Table("iris") brown = Table("brown-selected") housing = Table("housing") ow.show() ow.raise_() ow.set_dataset(iris, iris.name) ow.set_dataset(brown, brown.name) ow.set_dataset(housing, housing.name) rval = a.exec() # ow.saveSettings() return rval
def sendData(self): data = self.data values = set([str(ex.get_class()) for ex in data]) if len(values) == 1: # Remove the useless class variable. domain = Domain(data.domain.attributes) data = Table(domain, data) self.send("Data", data)
def set_tree(self, model=None): """When a different tree is given.""" self.clear() self.model = model if model is not None: # We need to know what kind of tree we have in order to properly # show colors and tooltips if isinstance(model, TreeClassifier): self.tree_type = self.CLASSIFICATION elif isinstance(model, TreeRegressor): self.tree_type = self.REGRESSION else: self.tree_type = self.GENERAL self.instances = model.instances # this bit is important for the regression classifier if self.instances is not None and \ self.instances.domain != model.domain: self.clf_dataset = Table.from_table( self.model.domain, self.instances) else: self.clf_dataset = self.instances self.tree_adapter = self._get_tree_adapter(self.model) self.color_palette = self._tree_specific('_get_color_palette')() self.ptree.clear() self.ptree.set_tree(self.tree_adapter) self.ptree.set_tooltip_func(self._tree_specific('_get_tooltip')) self.ptree.set_node_color_func( self._tree_specific('_get_node_color') ) self._tree_specific('_update_legend_colors')() self._update_legend_visibility() self._update_info_box() self._update_depth_slider() self._tree_specific('_update_target_class_combo')() self._update_main_area() # Get meta variables describing pythagoras tree if given from # forest. if hasattr(model, 'meta_size_calc_idx'): self.size_calc_idx = model.meta_size_calc_idx if hasattr(model, 'meta_size_log_scale'): self.size_log_scale = model.meta_size_log_scale # Updating the size calc redraws the whole tree if hasattr(model, 'meta_size_calc_idx') or \ hasattr(model, 'meta_size_log_scale'): self.update_size_calc() # The target class can also be passed from the meta properties if hasattr(model, 'meta_target_class_index'): self.target_class_index = model.meta_target_class_index self.update_colors()
def sendData(self): continuizer = self.constructContinuizer() if self.data is not None: domain = continuizer(self.data) data = Table.from_table(domain, self.data) self.send("Data", data) else: self.sendData("Data", None) self.data_changed = False
def test_report_widgets_visualize(self): _warnings = warnings.catch_warnings() _warnings.__enter__() warnings.simplefilter("ignore", OrangeDeprecationWarning) rep = OWReport.get_instance() data = Table("zoo") widgets = self.visu_widgets self._create_report(widgets, rep, data) _warnings.__exit__()
def test_z_score(self): domain = Domain([ContinuousVariable('A'), ContinuousVariable('B')]) table = Table.from_list(domain, [[1, 2], [3, 4]]) _table = ZScore(axis=0)(table) np.testing.assert_array_almost_equal([[-1, -1], [1, 1]], _table.X) _table = ZScore(axis=1)(table) np.testing.assert_array_almost_equal([[-1, 1], [-1, 1]], _table.X)
def test_quantile_normalization(self): domain = Domain( [ContinuousVariable('A'), ContinuousVariable('B'), ContinuousVariable('C'), ContinuousVariable('D')] ) table = Table.from_list(domain, [[5, 2, 3, 4], [4, 1, 4, 2], [3, 4, 6, 8]]) _table = QuantileNormalization()(table) # expected result result = np.array([[5.66666667, 2, 3, 4.66666667], [5.166667, 2, 5.166667, 3], [2, 3, 4.66666667, 5.66666667]]) np.testing.assert_array_almost_equal(result, _table.X)
def redo(self): self.classValuesModel.append(self.newClassLabel) newdomain = Domain([ContinuousVariable(self.widget.attr1), ContinuousVariable(self.widget.attr2)], DiscreteVariable("Class", values=self.classValuesModel)) newdata = Table(newdomain) instances = [Instance(newdomain, [float(ex[a]) for a in ex.domain.attributes] + [str(ex.get_class())]) for ex in self.data] newdata.extend(instances) self.widget.data = newdata self.widget.removeClassLabel.setEnabled(len(self.classValuesModel) > 1) newindex = self.classValuesModel.index(len(self.classValuesModel) - 1) self.widget.classValuesView.selectionModel().select( newindex, QtGui.QItemSelectionModel.ClearAndSelect) self.widget.updatePlot() self.widget.updateCursor()
def upload(filename): annotations = {'tabular': {}, 'other': {}} with urllib.request.urlopen(os.path.join(URL_REMOTE, filename + '.info')) as url: data = json.loads(url.read().decode()) annotations['tabular']['title'] = data['title'] annotations['tabular']['cells'] = data['instances'] annotations['tabular']['genes'] = data['num_of_genes'] annotations['tabular']['tax_id'] = data['taxid'] annotations['tabular'][ 'target'] = data['target'] if data['target'] else '' annotations['tabular']['tags'] = ', '.join(data['tags']) annotations['other']['description'] = data['description'] annotations['other']['references'] = ' | '.join(data['references']) annotations['other']['source'] = data['source'] annotations['other']['collection'] = data['collection'] annotations['other']['year'] = data['year'] annotations['other']['instances'] = data['instances'] annotations['other']['variables'] = data['variables'] data = Table(os.path.join(URL_REMOTE, filename)) if '.tab.gz' in filename: filename = filename.replace('.tab.gz', '.pickle') data.save(filename) dataset = res.run('data-table-upload', input={'src': filename}) # dataset = res.data.get(id=1) annotations['tabular']['file_name'] = filename annotations['tabular']['file_size'] = os.stat(filename).st_size # descriptor schema slug dataset.descriptor_schema = 'data_info' dataset.descriptor = annotations dataset.save() # cleanup os.remove(filename)
def redo(self): self.label = self.classValuesModel.pop(self.index) examples = [ex for ex in self.data if str(ex.get_class()) != self.label] newdomain = Domain([ContinuousVariable(self.widget.attr1), ContinuousVariable(self.widget.attr2)], DiscreteVariable("Class", values=self.classValuesModel)) newdata = Table(newdomain) for ex in examples: if str(ex.get_class()) != self.label and \ str(ex.get_class()) in self.classValuesModel: newdata.append( Instance(newdomain, [float(ex[a]) for a in ex.domain.attributes] + [str(ex.get_class())])) self.widget.data = newdata self.widget.updatePlot() self.widget.updateCursor()
def redo(self): newdomain = Domain([ContinuousVariable(self.widget.attr1), ContinuousVariable(self.widget.attr2)], DiscreteVariable("Class", values=self.classValuesModel)) newdata = Table(newdomain) for ex in self.data: print(ex.get_class()) if str(ex.get_class()) not in self.classValuesModel: self.oldLabelName = str(ex.get_class()) instance = Instance( newdomain, [float(ex[a]) for a in ex.domain.attributes] + [self.changedLabel]) newdata.append(instance) else: newdata.append( Instance(newdomain, [float(ex[a]) for a in ex.domain.attributes] + [str(ex.get_class())])) self.widget.data = newdata self.widget.updatePlot()
def redo(self): self.label = self.classValuesModel.pop(self.index) examples = [ ex for ex in self.data if str(ex.get_class()) != self.label ] newdomain = Domain([ ContinuousVariable(self.widget.attr1), ContinuousVariable(self.widget.attr2) ], DiscreteVariable("Class", values=self.classValuesModel)) newdata = Table(newdomain) for ex in examples: if str(ex.get_class()) != self.label and \ str(ex.get_class()) in self.classValuesModel: newdata.append( Instance(newdomain, [float(ex[a]) for a in ex.domain.attributes] + [str(ex.get_class())])) self.widget.data = newdata self.widget.updatePlot() self.widget.updateCursor()
def main(): # pragma: no cover """Simple test for manual inspection of the widget""" import sys from AnyQt.QtWidgets import QApplication a = QApplication(sys.argv) table = Table("zoo") ow = OWCreateClass() ow.show() ow.set_data(table) a.exec() ow.saveSettings()
def set_tree(self, model=None): """When a different tree is given.""" self.clear() self.model = model if model is not None: self.instances = model.instances # this bit is important for the regression classifier if self.instances is not None and \ self.instances.domain != model.domain: self.clf_dataset = Table.from_table(self.model.domain, self.instances) else: self.clf_dataset = self.instances self.tree_adapter = self._get_tree_adapter(self.model) self.ptree.clear() self.ptree.set_tree( self.tree_adapter, weight_adjustment=self.SIZE_CALCULATION[self.size_calc_idx][1], target_class_index=self.target_class_index, ) self._update_depth_slider() self.color_palette = self.ptree.root.color_palette self._update_legend_colors() self._update_legend_visibility() self._update_info_box() self._update_target_class_combo() self._update_main_area() # The target class can also be passed from the meta properties # This must be set after `_update_target_class_combo` if hasattr(model, 'meta_target_class_index'): self.target_class_index = model.meta_target_class_index self.update_colors() # Get meta variables describing what the settings should look like # if the tree is passed from the Pythagorean forest widget. if hasattr(model, 'meta_size_calc_idx'): self.size_calc_idx = model.meta_size_calc_idx self.update_size_calc() # TODO There is still something wrong with this # if hasattr(model, 'meta_depth_limit'): # self.depth_limit = model.meta_depth_limit # self.update_depth() self.send(ANNOTATED_DATA_SIGNAL_NAME, create_annotated_table(self.instances, None))
def redo(self): self.classValuesModel.append(self.newClassLabel) newdomain = Domain([ ContinuousVariable(self.widget.attr1), ContinuousVariable(self.widget.attr2) ], DiscreteVariable("Class", values=self.classValuesModel)) newdata = Table(newdomain) instances = [ Instance(newdomain, [float(ex[a]) for a in ex.domain.attributes] + [str(ex.get_class())]) for ex in self.data ] newdata.extend(instances) self.widget.data = newdata self.widget.removeClassLabel.setEnabled(len(self.classValuesModel) > 1) newindex = self.classValuesModel.index(len(self.classValuesModel) - 1) self.widget.classValuesView.selectionModel().select( newindex, QtGui.QItemSelectionModel.ClearAndSelect) self.widget.updatePlot() self.widget.updateCursor()
def test_report_widgets_regression(self): rep = OWReport.get_instance() data = Table("housing") widgets = self.regr_widgets w = self.create_widget(OWTreeGraph) mod = RegressionTreeLearner(max_depth=3)(data) mod.instances = data w.ctree(mod) w.create_report_html() rep.make_report(w) self._create_report(widgets, rep, data)
def test_report_widgets_model(self): rep = OWReport.get_instance() data = Table("titanic") widgets = self.model_widgets w = self.create_widget(OWTreeGraph) clf = TreeLearner(max_depth=3)(data) clf.instances = data w.ctree(clf) w.create_report_html() rep.make_report(w) self._create_report(widgets, rep, data)
def commit(self): if self._embedder != '' and self._smiles_attr != '': smiles = self.data[:, self._smiles_attr].metas.flatten() embedded, valid = self.to_fingerprints(smiles, self._embedder) invalid = list(set(range(len(smiles))) - set(valid)) if not valid == []: domain = [ContinuousVariable.make("C_{}".format(x)) for x in range(embedded.shape[1])] if self._embedder == MACCS: domain = [ContinuousVariable.make(name) for name, _ in Chem.MACCSkeys.smartsPatts.values()] domain.append(ContinuousVariable.make('?')) embedded_table = Table.from_numpy( Domain(domain, self.data.domain.class_vars, self.data.domain.metas ), embedded, self.data.Y[valid], self.data.metas[valid], self.data.W[valid] ) self.Outputs.embedded_smiles.send(embedded_table) else: self.Outputs.embedded_smiles.send(None) if not invalid == []: invalid_table = Table.from_numpy( self.data.domain, self.data.X[invalid], self.data.Y[invalid], self.data.metas[invalid], self.data.W[invalid] ) self.Outputs.skipped_smiles.send(invalid_table) else: self.Outputs.skipped_smiles.send(None)
def apply_domain_edit(self): attributes = [] class_vars = [] metas = [] places = [attributes, class_vars, metas] X, y, m = [], [], [] cols = [X, y, m] # Xcols, Ycols, Mcols def is_missing(x): return str(x) in ("nan", "") for column, (name, tpe, place, vals, is_con), (orig_var, orig_plc) in \ zip(count(), self.editor_model.variables, chain([(at, 0) for at in self.data.domain.attributes], [(cl, 1) for cl in self.data.domain.class_vars], [(mt, 2) for mt in self.data.domain.metas])): if place == 3: continue if orig_plc == 2: col_data = list(chain(*self.data[:, orig_var].metas)) else: col_data = list(chain(*self.data[:, orig_var])) if name == orig_var.name and tpe == type(orig_var): var = orig_var elif tpe == DiscreteVariable: values = list( str(i) for i in set(col_data) if not is_missing(i)) var = tpe(name, values) col_data = [ np.nan if is_missing(x) else values.index(str(x)) for x in col_data ] elif tpe == StringVariable and type(orig_var) == DiscreteVariable: var = tpe(name) col_data = [ orig_var.repr_val(x) if not np.isnan(x) else "" for x in col_data ] else: var = tpe(name) places[place].append(var) cols[place].append(col_data) domain = Domain(attributes, class_vars, metas) X = np.array(X).T if len(X) else np.empty((len(self.data), 0)) y = np.array(y).T if len(y) else None dtpe = object if any( isinstance(m, StringVariable) for m in domain.metas) else float m = np.array(m, dtype=dtpe).T if len(m) else None table = Table.from_numpy(domain, X, y, m, self.data.W) self.send("Data", table) self.apply_button.setEnabled(False)
def from_table(cls, domain, source, row_indices=...): """ Create a new table from selected columns and/or rows of an existing one. The columns are chosen using a domain. The domain may also include variables that do not appear in the source table; they are computed from source variables if possible. The resulting data may be a - new LazyTable if source is a LazyTable, domain contains only attributes of the source and row_indices is not specified. This should ensure that the SelectAttributes widget works. - a normal Table otherwise, which could apparently be view or a copy of the existing data. However, what happens with a view of growing data is unknown. :param domain: the domain for the new table :type domain: Orange.data.Domain :param source: the source table :type source: Orange.data.Table :param row_indices: indices of the rows to include :type row_indices: a slice or a sequence :return: a new table :rtype: Orange.data.Table """ # TODO: Improve the lazyness support for other cases? # TODO: Investigate this computing of new variables. subdomain = all(v in source.domain for v in domain) if isinstance(source, LazyTable) and subdomain: table_new = LazyTable.from_domain(domain) table_new.stop_pulling = True # Should only be done by first LazyTable? table_new.table_origin = source # Fill the table with the rows that were already materialized. # TODO: Do something smarter here? # Definitely, currently we need the copy.copy to prevent # RuntimeError: dictionary changed size during iteration for row_index_full in copy.copy(table_new.table_origin.row_mapping): for variable in table_new.domain: # pylint: disable=unused-variable value = table_new[row_index_full][variable] else: table_new = Table.from_table( domain=domain, source=source, row_indices=row_indices, ) return table_new
def retrieve(self, url): if not url: return progress = gui.ProgressBar(self, 10) for i in range(3): progress.advance() try: table = Table.from_url(url) except Exception as e: import traceback log.error(traceback.format_exc()) log.error("Couldn't load spreadsheet %s: %s", url, e) self.error("Couldn't load spreadsheet. Ensure correct read permissions; rectangular, top-left aligned sheet data ...") return else: for i in range(7): progress.advance() finally: progress.finish() return table
def apply_domain_edit(self): if self.data is None: table = None else: domain, cols = self.domain_editor.get_domain(self.data.domain, self.data) if not (domain.variables or domain.metas): table = None else: X, y, m = cols table = Table.from_numpy(domain, X, y, m, self.data.W) table.name = self.data.name table.ids = np.array(self.data.ids) table.attributes = getattr(self.data, 'attributes', {}) self.Outputs.data.send(table) self.apply_button.setEnabled(False)
def apply_domain_edit(self): attributes = [] class_vars = [] metas = [] places = [attributes, class_vars, metas] X, y, m = [], [], [] cols = [X, y, m] # Xcols, Ycols, Mcols def is_missing(x): return str(x) in ("nan", "") for column, (name, tpe, place, vals, is_con), (orig_var, orig_plc) in \ zip(count(), self.editor_model.variables, chain([(at, 0) for at in self.data.domain.attributes], [(cl, 1) for cl in self.data.domain.class_vars], [(mt, 2) for mt in self.data.domain.metas])): if place == 3: continue if orig_plc == 2: col_data = list(chain(*self.data[:, orig_var].metas)) else: col_data = list(chain(*self.data[:, orig_var])) if name == orig_var.name and tpe == type(orig_var): var = orig_var elif tpe == DiscreteVariable: values = list(str(i) for i in set(col_data) if not is_missing(i)) var = tpe(name, values) col_data = [np.nan if is_missing(x) else values.index(str(x)) for x in col_data] elif tpe == StringVariable and type(orig_var) == DiscreteVariable: var = tpe(name) col_data = [orig_var.repr_val(x) if not np.isnan(x) else "" for x in col_data] else: var = tpe(name) places[place].append(var) cols[place].append(col_data) domain = Domain(attributes, class_vars, metas) X = np.array(X).T if len(X) else np.empty((len(self.data), 0)) y = np.array(y).T if len(y) else None dtpe = object if any(isinstance(m, StringVariable) for m in domain.metas) else float m = np.array(m, dtype=dtpe).T if len(m) else None table = Table.from_numpy(domain, X, y, m, self.data.W) self.send("Data", table) self.apply_button.setEnabled(False)
def insert_topics_into_corpus(self, corp_in): """ Insert topical representation into corpus. :param corp_in: Corpus into whic we want to insert topical representations :return: `Orange.data.table.Table` """ matrix = matutils.corpus2dense(self.corpus, num_terms=self.num_topics).T # Generate the new table. attr = [ContinuousVariable(n) for n in self.topic_names] domain = Domain(attr, corp_in.domain.class_vars, metas=corp_in.domain.metas) return Table.from_numpy(domain, matrix, Y=corp_in._Y, metas=corp_in.metas)
def get_all_topics_table(self): """ Transform all topics from gensim model to table. """ all_words = self._topics_words(self.n_words) all_weights = self._topics_weights(self.n_words) sorted_words = sorted(all_words[0]) n_topics = len(all_words) X = [] for words, weights in zip(all_words, all_weights): weights = [we for wo, we in sorted(zip(words, weights))] X.append(weights) X = np.array(X).T # take only first n_topics; e.g. when user requested 10, but gensim # returns only 9 — when the rank is lower than num_topics requested attrs = [ContinuousVariable(n) for n in self.topic_names[:n_topics]] t = Table.from_numpy(Domain(attrs, metas=[StringVariable('Word')]), X=X, metas=np.array(sorted_words)[:, None]) t.name = 'All topics' return t
def set_tree(self, model=None): """When a different tree is given.""" self.clear() self.model = model if model is not None: # We need to know what kind of tree we have in order to properly # show colors and tooltips if model.domain.class_var.is_discrete: self.tree_type = self.CLASSIFICATION elif model.domain.class_var.is_continuous: self.tree_type = self.REGRESSION else: self.tree_type = self.GENERAL self.instances = model.instances # this bit is important for the regression classifier if self.instances is not None and \ self.instances.domain != model.domain: self.clf_dataset = Table.from_table( self.model.domain, self.instances) else: self.clf_dataset = self.instances self.tree_adapter = self._get_tree_adapter(self.model) self.color_palette = self._tree_specific('_get_color_palette')() self.ptree.clear() self.ptree.set_tree(self.tree_adapter) self.ptree.set_tooltip_func(self._tree_specific('_get_tooltip')) self.ptree.set_node_color_func( self._tree_specific('_get_node_color') ) self._tree_specific('_update_legend_colors')() self._update_legend_visibility() self._update_info_box() self._update_depth_slider() self._tree_specific('_update_target_class_combo')() self._update_main_area() # Get meta variables describing pythagoras tree if given from # forest. if hasattr(model, 'meta_size_calc_idx'): self.size_calc_idx = model.meta_size_calc_idx if hasattr(model, 'meta_size_log_scale'): self.size_log_scale = model.meta_size_log_scale # Updating the size calc redraws the whole tree if hasattr(model, 'meta_size_calc_idx') or \ hasattr(model, 'meta_size_log_scale'): self.update_size_calc() # The target class can also be passed from the meta properties if hasattr(model, 'meta_target_class_index'): self.target_class_index = model.meta_target_class_index self.update_colors() # TODO this messes up the viewport in pythagoras tree viewer # it seems the viewport doesn't reset its size if this is applied # if hasattr(model, 'meta_depth_limit'): # self.depth_limit = model.meta_depth_limit # self.update_depth() self.send(ANNOTATED_DATA_SIGNAL_NAME, create_annotated_table(self.instances, None))
def received_table_load_votable(self, private_key, sender_id, msg_id, mtype, parameters, extra): """ Read the received VOTable and broadcast. """ print("Call:", private_key, sender_id, msg_id, mtype, parameters, extra) # Retrieve and read the VOTable. url_table = parameters['url'] # sys.stdout is redirected by canvas.__main__ via redirect_stdout() # in canvas.util.redirect to an # Orange.canvas.application.outputview.TextStream object. This # has a @queued_blocking flush(), which can result in an "Result not # yet ready" RuntimeError from the QueuedCallEvent class. # This exception is raised because astropy.io.votable.table uses # astropy.utils.xml.iterparser, which uses astropy.utils.data, # which uses the Spinner class from astropy.utils.console, which # finally uses stdout to output a progress indicator. # Orange has its own mechanisms for indicating progress, so it would # perhaps be better to try to use that. # For now, the Orange redirect of stdout is temporarily disabled # while the votable is being parsed. stdout_orange = sys.stdout sys.stdout = sys.__stdout__ votable_tree = votable.parse(url_table) sys.stdout = stdout_orange print("VOTable Tree created") votable_table = votable_tree.get_first_table() #type(votable) #<class 'astropy.io.votable.tree.Table'> table = votable_table.to_table() #type(table) #<class 'astropy.table.table.Table'> print("AstroPy table made") # This does not allow classes. if False: # Convert the VOTable to a Domain. # TODO: Y en metas attributes = [ ContinuousVariable(name=column) for column in table.columns ] domain = Domain(attributes = attributes) print("Domain made") # Convert the VOTable to a Table # Append the Table to LazyTable self.data. # (Re)send self.data). # TODO: Use from_domain() implicitly from __init__(). # TODO: Include support to stop_pulling immediately. otable = Table.from_domain( #otable = LazyTable.from_domain( #otable = LazyTable( domain = domain, n_rows = len(table), # stop_pulling = True, ) otable.stop_pulling = True # TODO: set widget_origin? print("Orange Table initialized") for i, variable in enumerate(otable.domain.variables): otable.X[:,i] = table.columns[variable.name].data attributes = [ ContinuousVariable(name=column) for column in table.columns if not 'CLASS' in column ] class_vars = [ DiscreteVariable(name=column, values=['alpha', 'beta']) for column in table.columns if 'CLASS' in column ] domain = Domain( attributes=attributes, class_vars=class_vars, ) otable = Table.from_domain( domain = domain, n_rows = len(table), ) otable.stop_pulling = True print("Orange Table initialized") for i, variable in enumerate(otable.domain.attributes): otable.X[:,i] = table.columns[variable.name].data for i, variable in enumerate(otable.domain.class_vars): #otable.Y[:,i] = table.columns[variable.name].data #otable.Y[:] = table.columns[variable.name].data otable.Y[:] = table.columns[variable.name].data.round() print("Orange Table filled") if self.data is None: self.data = otable else: self.data.extend(otable) # TODO: Why doesn't this work?: #for row in otable: # self.data.append(row) self.send("Data", self.data) print("Orange Table send A")
def open_file(self, fn): self.error() self.warning() self.information() fn_original = fn if not os.path.exists(fn): dir_name, basename = os.path.split(fn) if os.path.exists(os.path.join(".", basename)): fn = os.path.join(".", basename) self.information("Loading '{}' from the current directory." .format(basename)) if fn == "(none)": self.send("Data", None) self.infoa.setText("No data loaded") self.infob.setText("") self.warnings.setText("") return self.loaded_file = "" data = None err_value = None try: # TODO handle self.new_variables data = Table(fn) self.loaded_file = fn except Exception as exc: err_value = str(exc) if "is being loaded as" in str(err_value): try: data = Table(fn) self.loaded_file = fn self.warning(0, err_value) except: data = None if err_value is not None: if fn.startswith("http"): err_value = "File '{}' does not contain valid data".format( os.path.basename(fn) ) ind = self.file_combo.currentIndex() text = self.file_combo.currentText() self.file_combo.removeItem(ind) self.file_combo.lineEdit().setText(text) if ind < len(self.recent_paths) and \ self.recent_paths[ind].abspath == fn_original: del self.recent_paths[ind] self.error(err_value) self.infoa.setText('Data was not loaded due to an error.') self.infob.setText('Error:') self.warnings.setText(err_value) if data is None: self.dataReport = None else: domain = data.domain self.infoa.setText( "{} instance(s), {} feature(s), {} meta attribute(s)" .format(len(data), len(domain.attributes), len(domain.metas))) if domain.has_continuous_class: self.infob.setText("Regression; numerical class.") elif domain.has_discrete_class: self.infob.setText("Classification; " + "discrete class with {} values." .format(len(domain.class_var.values))) elif data.domain.class_vars: self.infob.setText("Multi-target; {} target variables." .format(len(data.domain.class_vars))) else: self.infob.setText("Data has no target variable.") self.warnings.setText("") add_origin(data, fn) # make new data and send it file_name = os.path.split(fn)[1] if "." in file_name: data.name = file_name[:file_name.rfind('.')] else: data.name = file_name self.dataReport = self.prepareDataReport(data) self.send("Data", data)
def open_file(self, fn): self.error() self.warning() self.information() if not os.path.exists(fn): dirname, basename = os.path.split(fn) if os.path.exists(os.path.join(".", basename)): fn = os.path.join(".", basename) self.information("Loading '{}' from the current directory." .format(basename)) if fn == "(none)": self.send("Data", None) self.infoa.setText("No data loaded") self.infob.setText("") self.warnings.setText("") return self.loaded_file = "" data = None try: # TODO handle self.new_variables data = Table(fn) self.loaded_file = fn except Exception as errValue: if "is being loaded as" in str(errValue): try: data = Table(fn) self.loaded_file = fn self.warning(0, errValue) except: self.error(errValue) self.infoa.setText('Data was not loaded due to an error.') self.infob.setText('Error:') self.warnings.setText(errValue) if data is None: self.dataReport = None else: domain = data.domain self.infoa.setText( '{} instance(s), {} feature(s), {} meta attributes' .format(len(data), len(domain.attributes), len(domain.metas))) if isinstance(domain.class_var, ContinuousVariable): self.infob.setText('Regression; Numerical class.') elif isinstance(domain.class_var, DiscreteVariable): self.infob.setText('Classification; Discrete class with {} values.' .format(len(domain.class_var.values))) elif data.domain.class_vars: self.infob.setText('Multi-target; {} target variables.' .format(len(data.domain.class_vars))) else: self.infob.setText("Data has no target variable.") addOrigin(data, fn) # make new data and send it fName = os.path.split(fn)[1] if "." in fName: data.name = fName[:fName.rfind('.')] else: data.name = fName self.dataReport = self.prepareDataReport(data) self.send("Data", data)