def test_sparse_data(self): data = Table("iris") data.X = sparse.csr_matrix(data.X) self.assertTrue(sparse.issparse(data.X)) self.widget.manifold_method_index = 2 self.send_signal(self.widget.Inputs.data, data) self.widget.apply_button.button.click() self.assertTrue(self.widget.Error.sparse_methods.is_shown()) self.send_signal(self.widget.Inputs.data, None) self.widget.apply_button.button.click() self.assertFalse(self.widget.Error.sparse_methods.is_shown()) # GH 2158 self.widget.manifold_method_index = 0 self.assertEqual( 'TSNE', self.widget.MANIFOLD_METHODS[ self.widget.manifold_method_index].__name__) self.send_signal(self.widget.Inputs.data, data) self.widget.apply_button.button.click() self.assertFalse(self.widget.Error.sparse_methods.is_shown()) self.assertFalse(self.widget.Error.sparse_tsne_distance.is_shown()) self.assertIsInstance( self.get_output(self.widget.Outputs.transformed_data), Table) self.widget.params_widget.parameters['metric'] = 'chebyshev' self.widget.apply_button.button.click() self.assertTrue(self.widget.Error.sparse_tsne_distance.is_shown())
def __call__(self, data): """ Apply randomization of the given data. Returns a new data table. Parameters ---------- data : Orange.data.Table A data table to be randomized. Returns ------- data : Orange.data.Table Randomized data table. """ new_data = Table(data) new_data.ensure_copy() if self.rand_type & Randomize.RandomizeClasses: new_data.Y = self.randomize(new_data.Y) if self.rand_type & Randomize.RandomizeAttributes: new_data.X = self.randomize(new_data.X) if self.rand_type & Randomize.RandomizeMetas: new_data.metas = self.randomize(new_data.metas) return new_data
def test_sparse(self): table = Table("iris") table.X = sp.csr_matrix(table.X) self.assertTrue(sp.issparse(table.X)) self.assertFalse(self.widget.Warning.sparse_not_supported.is_shown()) self.send_signal(self.widget.Inputs.data, table) self.assertTrue(self.widget.Warning.sparse_not_supported.is_shown())
def test_sparse_data(self, timeout=DEFAULT_TIMEOUT): """Test widget for sparse data""" table = Table("iris") table.X = sp.csr_matrix(table.X) self.assertTrue(sp.issparse(table.X)) self.send_signal(self.widget.Inputs.data, table) if self.widget.isBlocking(): spy = QSignalSpy(self.widget.blockingStateChanged) self.assertTrue(spy.wait(timeout)) self.send_signal(self.widget.Inputs.data_subset, table[::30]) self.assertEqual(len(self.widget.subset_indices), 5)
def test_sparse_data(self, timeout=DEFAULT_TIMEOUT): table = Table("iris") with table.unlocked(): table.X = sp.csr_matrix(table.X) self.assertTrue(sp.issparse(table.X)) self.send_signal(self.widget.Inputs.data, table) self.assertTrue(self.widget.Error.sparse_data.is_shown()) self.send_signal(self.widget.Inputs.data_subset, table[::30]) self.assertEqual(len(self.widget.subset_data), 5) self.send_signal(self.widget.Inputs.data, None) self.assertFalse(self.widget.Error.sparse_data.is_shown())
def test_sparse_data(self): data = Table("iris") data.X = sp.csr_matrix(data.X) self.widget.set_data(data) decomposition = DECOMPOSITIONS[self.widget.decomposition_idx] self.assertTrue(decomposition.supports_sparse) self.assertFalse(self.widget.normalize_box.isEnabled()) buttons = self.widget.decomposition_box.group.box.buttons for i, decomposition in enumerate(DECOMPOSITIONS): if not decomposition.supports_sparse: self.assertFalse(buttons[i].isEnabled()) data = Table("iris") self.widget.set_data(data) self.assertTrue(all([b.isEnabled() for b in buttons])) self.assertTrue(self.widget.normalize_box.isEnabled())
def test_save_uncompressed(self): widget = self.widget widget.auto_save = False spiris = Table("iris") spiris.X = sp.csr_matrix(spiris.X) for selected_filter, writer in widget.filters.items(): widget.write = writer ext = writer.EXTENSIONS[0] with named_file("", suffix=ext) as filename: widget.get_save_filename = Mock( return_value=(filename, selected_filter)) self.send_signal(widget.Inputs.data, self.iris) widget.save_file_as() self.assertEqual(len(Table(filename)), 150) if writer.SUPPORT_SPARSE_DATA: self.send_signal(widget.Inputs.data, spiris) widget.save_file() self.assertEqual(len(Table(filename)), 150)
def test_prepare_freeviz_data(self): table = Table("iris") FreeViz.prepare_freeviz_data(table) table.X = table.X * np.nan self.assertEqual(FreeViz.prepare_freeviz_data(table), (None, None, None)) table.X = None FreeViz.prepare_freeviz_data(table)
def test_column_filtering_sparse(self): data = Table("iris") with data.unlocked(): data.X = csr_matrix(data.X) new_data = RemoveNaNColumns()(data) self.assertEqual(data, new_data)
def test_sparse(self): table = Table("iris") table.X = sp.csr_matrix(table.X) self.assertTrue(sp.issparse(table.X)) self.assertFalse(self.widget.Error.sparse_data.is_shown()) self.send_signal(self.widget.Inputs.data, table) self.assertTrue(self.widget.Error.sparse_data.is_shown())
def test_save_uncompressed(self): widget = self.widget widget.auto_save = False spiris = Table("iris") with spiris.unlocked(): spiris.X = sp.csr_matrix(spiris.X) for selected_filter, writer in widget.get_filters().items(): widget.write = writer ext = writer.EXTENSIONS[0] with named_file("", suffix=ext) as filename: widget.get_save_filename = Mock(return_value=(filename, selected_filter)) self.send_signal(widget.Inputs.data, self.iris) widget.save_file_as() if hasattr(writer, "read"): self.assertEqual(len(writer(filename).read()), 150) if writer.SUPPORT_SPARSE_DATA: self.send_signal(widget.Inputs.data, spiris) widget.save_file() if hasattr(writer, "read"): self.assertEqual(len(writer(filename).read()), 150)
def test_sparse_warning(self): """Check if the user is warned about sparse input""" data = Table("iris") self.send_signal("Data", data) self.assertFalse(self.widget.Warning.sparse_data.is_shown()) data.X = csr_matrix(data.X) self.send_signal("Data", data) self.assertTrue(self.widget.Warning.sparse_data.is_shown())
def test_sparse_data(self): table = Table("iris") table.X = sp.csr_matrix(table.X) self.assertTrue(sp.issparse(table.X)) self.send_signal(self.widget.Inputs.data, table) self.assertTrue(self.widget.Error.sparse_data.is_shown()) self.send_signal(self.widget.Inputs.data_subset, table[::30]) self.assertEqual(len(self.widget.subset_data), 5) self.send_signal(self.widget.Inputs.data, None) self.assertFalse(self.widget.Error.sparse_data.is_shown())
def test_sparse_data(self): data = Table("iris") data.X = sparse.csr_matrix(data.X) self.assertTrue(sparse.issparse(data.X)) self.send_signal("Data", data) self.widget.apply_button.button.click() self.assertTrue(self.widget.Error.sparse_not_supported.is_shown()) self.send_signal("Data", None) self.widget.apply_button.button.click() self.assertFalse(self.widget.Error.sparse_not_supported.is_shown())
def test_sparse_subset_data(self): """ Scatter Plot can handle sparse subset data. GH-2773 """ data = Table("iris") w = self.widget data.X = sp.csr_matrix(data.X) self.send_signal(w.Inputs.data, data) self.send_signal(w.Inputs.data_subset, data[::30]) self.assertEqual(len(w.graph.subset_indices), 5)
def test_mixed_features(self): data = Table('auto-mpg') data.X = Imputer().fit_transform(data.X) s = SelectBestFeatures(method=UnivariateLinearRegression(), k=2) data2 = s(data) self.assertEqual(sum(1 for f in data2.domain.attributes if isinstance(f, ContinuousVariable)), 2) self.assertEqual(sum(1 for f in data2.domain.attributes if isinstance(f, DiscreteVariable)), sum(1 for f in data.domain.attributes if isinstance(f, DiscreteVariable)))
def test_sparse_data(self): """ Show warning msg when data is sparse. GH-2298 GH-2163 """ data = Table("iris")[::25] data.X = sp.csr_matrix(data.X) self.send_signal(self.widget.Inputs.data, data) self.assertTrue(self.widget.Warning.sparse_not_supported.is_shown()) self.send_signal(self.widget.Inputs.data, None) self.assertFalse(self.widget.Warning.sparse_not_supported.is_shown())
def test_mixed_features(self): data = Table('auto-mpg') data.X = Imputer().fit_transform(data.X) s = SelectBestFeatures(method=UnivariateLinearRegression(), k=2) data2 = s(data) self.assertEqual( sum(1 for f in data2.domain.attributes if isinstance(f, ContinuousVariable)), 2) self.assertEqual( sum(1 for f in data2.domain.attributes if isinstance(f, DiscreteVariable)), sum(1 for f in data.domain.attributes if isinstance(f, DiscreteVariable)))
def test_empty_groups(self): """Test if groups with zero elements are not shown""" table = Table(test_filename("datasets/cyber-security-breaches.tab")) self.send_signal(self.widget.Inputs.data, table) self.__select_variable("US State") self.__select_group("US State") self.assertEqual(52, len(self.widget.boxes)) # select rows with US State equal to TX or MO use_indexes = np.array([0, 1, 25, 26, 27]) table.X = table.X[use_indexes] self.send_signal(self.widget.Inputs.data, table) self.assertEqual(2, len(self.widget.boxes))
def test_empty_groups(self): """Test if groups with zero elements are not shown""" table = Table("cyber-security-breaches") self.send_signal(self.widget.Inputs.data, table) self.__select_variable("US State") self.__select_group("US State") self.assertEqual(52, len(self.widget.boxes)) # select rows with US State equal to TX or MO use_indexes = np.array([0, 1, 25, 26, 27]) table.X = table.X[use_indexes] self.send_signal(self.widget.Inputs.data, table) self.assertEqual(2, len(self.widget.boxes))
def test_sparse_data_regression(self): """ Regression Tree can handle sparse data. GH-2497 """ table1 = Table("housing") self.send_signal("Data", table1) model_dense = self.get_output("Model") table2 = Table("housing") table2.X = sp.csr_matrix(table2.X) self.send_signal("Data", table2) model_sparse = self.get_output("Model") self.assertTrue(np.array_equal(model_dense._code, model_sparse._code)) self.assertTrue(np.array_equal(model_dense._values, model_sparse._values))
def test_domain_edit_on_sparse_data(self): iris = Table("iris") iris.X = sp.csr_matrix(iris.X) f = tempfile.NamedTemporaryFile(suffix='.pickle', delete=False) pickle.dump(iris, f) f.close() self.widget.add_path(f.name) self.widget.load_data() output = self.get_output(self.widget.Outputs.data) self.assertIsInstance(output, Table) self.assertEqual(iris.X.shape, output.X.shape) self.assertTrue(sp.issparse(output.X))
def test_sparse(self): """ Test sparse data. GH-2152 GH-2157 """ table = Table("iris") table.X = sp.csr_matrix(table.X) self.assertTrue(sp.issparse(table.X)) table.Y = sp.csr_matrix(table._Y) # pylint: disable=protected-access self.assertTrue(sp.issparse(table.Y)) self.send_signal("Data", table) self.widget.set_subset_data(table[:30]) data = self.get_output("Data") self.assertTrue(data.is_sparse()) self.assertEqual(len(data.domain), 5)
def test_sparse_data(self): """ Sparse support. GH-2160 GH-2260 """ table = Table("iris") self.send_signal(self.widget.Inputs.data, table) self.assertEqual(len(self.widget.discrete_data.domain), len(table.domain)) output = self.get_output("Data") self.assertFalse(output.is_sparse()) table.X = sp.csr_matrix(table.X) self.send_signal(self.widget.Inputs.data, table) self.assertEqual(len(self.widget.discrete_data.domain), 2) output = self.get_output("Data") self.assertTrue(output.is_sparse())
def test_sparse_data(self): data = Table("iris") data.X = sparse.csr_matrix(data.X) self.assertTrue(sparse.issparse(data.X)) self.widget.manifold_method_index = 2 self.send_signal(self.widget.Inputs.data, data) self.widget.apply_button.button.click() self.assertTrue(self.widget.Error.sparse_methods.is_shown()) self.send_signal(self.widget.Inputs.data, None) self.widget.apply_button.button.click() self.assertFalse(self.widget.Error.sparse_methods.is_shown()) # GH 2158 self.widget.manifold_method_index = 0 self.assertEqual( 'TSNE', self.widget.MANIFOLD_METHODS[self.widget.manifold_method_index].__name__) self.send_signal(self.widget.Inputs.data, data) self.widget.apply_button.button.click() self.assertFalse(self.widget.Error.sparse_methods.is_shown()) self.assertFalse(self.widget.Error.sparse_tsne_distance.is_shown()) self.assertIsInstance(self.get_output(self.widget.Outputs.transformed_data), Table) self.widget.params_widget.parameters['metric'] = 'chebyshev' self.widget.apply_button.button.click() self.assertTrue(self.widget.Error.sparse_tsne_distance.is_shown())
def test_sparse(self): """ Merge should work with sparse. GH-2295 GH-2155 """ data = Table("iris")[::25] data_ed_dense = Table("titanic")[::300] data_ed_sparse = Table("titanic")[::300] data_ed_sparse.X = sp.csr_matrix(data_ed_sparse.X) self.send_signal("Data", data) self.send_signal("Extra Data", data_ed_dense) output_dense = self.get_output("Data") self.assertFalse(sp.issparse(output_dense.X)) self.assertFalse(output_dense.is_sparse()) self.send_signal("Extra Data", data_ed_sparse) output_sparse = self.get_output("Data") self.assertTrue(sp.issparse(output_sparse.X)) self.assertTrue(output_sparse.is_sparse()) output_sparse.X = output_sparse.X.toarray() self.assertTablesEqual(output_dense, output_sparse)
def test_str(self): iris = Table('iris') with iris.unlocked(): iris.X, iris.Y = csr_matrix(iris.X), csr_matrix(iris.Y) str(iris)
def test_str(self): iris = Table('iris') iris.X, iris.Y = csr_matrix(iris.X), csr_matrix(iris.Y) str(iris)
def test_error_on_sparse_data(self): data = Table('iris') data.X = sp.csr_matrix(data.X) self.widget.set_data(data) self.assertTrue(self.widget.Error.sparse_data.is_shown())
def test_column_filtering_sparse(self): data = Table("iris") data.X = csr_matrix(data.X) new_data = RemoveNaNColumns()(data) self.assertEqual(data, new_data)
def _calculate_table_values(self): genes = self.data.domain.attributes[:self.GENE_MAXIMUM] matrix = self.aggregated_data clusters = self.clusters_unordered if self.transpose: matrix, clusters, genes = self._transpose(matrix, clusters, genes) # create data table since imputation of nan values is required matrix = Table(Domain(genes), matrix) matrix_before_norm = matrix.copy() # for tooltip matrix = SklImpute()(matrix) if self.log_scale: matrix.X = np.log(matrix.X + 1) if self.normalize: matrix.X = self._normalize(matrix.X) # values must be in range [0, 1] for visualisation matrix.X = self._norm_min_max(matrix.X) if self.biclustering: cluster_order, gene_order = self.cluster_data(matrix) else: cluster_order, gene_order = np.arange(matrix.X.shape[0]), np.arange(matrix.X.shape[1]) # reorder self.matrix = matrix[cluster_order][:, gene_order] self.matrix_before_norm = matrix_before_norm[cluster_order][:, gene_order] self.clusters = clusters[cluster_order] self._refresh_table() self._update_selection() self._invalidate()
def test_sparse(self): """ Test sparse data. GH-2152 GH-2157 """ table = Table("iris") table.X = sp.csr_matrix(table.X) self.assertTrue(sp.issparse(table.X)) table.Y = sp.csr_matrix(table._Y) # pylint: disable=protected-access self.assertTrue(sp.issparse(table.Y)) self.send_signal(self.widget.Inputs.data, table) self.widget.set_subset_data(table[:30]) data = self.get_output("Data") self.assertTrue(data.is_sparse()) self.assertEqual(len(data.domain), 5)