def test_stats_sparse(self): X = csr_matrix(np.identity(5)) np.testing.assert_equal( stats(X), [ [0, 1, 0.2, 0, 4, 1], [0, 1, 0.2, 0, 4, 1], [0, 1, 0.2, 0, 4, 1], [0, 1, 0.2, 0, 4, 1], [0, 1, 0.2, 0, 4, 1], ], ) # assure last two columns have just zero elements X = X[:3] np.testing.assert_equal( stats(X), [ [0, 1, 1 / 3, 0, 2, 1], [0, 1, 1 / 3, 0, 2, 1], [0, 1, 1 / 3, 0, 2, 1], [0, 0, 0, 0, 3, 0], [0, 0, 0, 0, 3, 0], ], )
def test_stats_weights(self): X = np.arange(4).reshape(2, 2).astype(float) weights = np.array([1, 3]) np.testing.assert_equal(stats(X, weights), [[0, 2, 1.5, 0, 0, 2], [1, 3, 2.5, 0, 0, 2]]) X = np.arange(4).reshape(2, 2).astype(object) np.testing.assert_equal(stats(X, weights), stats(X))
def test_stats_sparse(self): """ Stats should not fail when trying to calculate mean on sparse data. GH-2357 """ data = Table("iris") sparse_x = sp.csr_matrix(data.X) self.assertTrue(stats(data.X).all() == stats(sparse_x).all())
def test_stats(self): X = np.arange(4).reshape(2, 2).astype(float) X[1, 1] = np.nan np.testing.assert_equal(stats(X), [[0, 2, 1, 0, 0, 2], [1, 1, 1, 0, 1, 1]]) # empty table should return ~like metas X = X[:0] np.testing.assert_equal(stats(X), [[np.inf, -np.inf, 0, 0, 0, 0], [np.inf, -np.inf, 0, 0, 0, 0]])
def test_stats_sparse(self): X = csr_matrix(np.identity(5)) np.testing.assert_equal(stats(X), [[0, 1, .2, 0, 4, 1], [0, 1, .2, 0, 4, 1], [0, 1, .2, 0, 4, 1], [0, 1, .2, 0, 4, 1], [0, 1, .2, 0, 4, 1]]) # assure last two columns have just zero elements X = X[:3] np.testing.assert_equal(stats(X), [[0, 1, 1/3, 0, 2, 1], [0, 1, 1/3, 0, 2, 1], [0, 1, 1/3, 0, 2, 1], [0, 0, 0, 0, 3, 0], [0, 0, 0, 0, 3, 0]])
def test_stats_sparse(self): X = csr_matrix(np.identity(5)) np.testing.assert_equal(stats(X), [[0, 1, .2, 0, 4, 1], [0, 1, .2, 0, 4, 1], [0, 1, .2, 0, 4, 1], [0, 1, .2, 0, 4, 1], [0, 1, .2, 0, 4, 1]])
def test_coef_table_multiple(self): data = Table("zoo") learn = LogisticRegressionLearner() classifier = learn(data) coef_table = create_coef_table(classifier) self.assertEqual(1, len(stats(coef_table.metas, None))) self.assertEqual(len(coef_table), len(classifier.domain.attributes) + 1) self.assertEqual(len(coef_table[0]), len(classifier.domain.class_var.values))
def test_coef_table_single(self): data = Table("titanic") learn = LogisticRegressionLearner() classifier = learn(data) coef_table = create_coef_table(classifier) self.assertEqual(1, len(stats(coef_table.metas, None))) self.assertEqual(len(coef_table), len(classifier.domain.attributes) + 1) self.assertEqual(len(coef_table[0]), 1)
def test_stats_non_numeric(self): X = np.array([ ['', 'a', 'b'], ['a', '', 'b'], ['a', 'b', ''], ], dtype=object) np.testing.assert_equal(stats(X), [[np.inf, -np.inf, 0, 0, 1, 2], [np.inf, -np.inf, 0, 0, 1, 2], [np.inf, -np.inf, 0, 0, 1, 2]])
def test_stats_non_numeric(self): X = np.array([ ["", "a", np.nan, 0], ["a", "", np.nan, 1], ["a", "b", 0, 0], ], dtype=object) np.testing.assert_equal(stats(X), [[np.inf, -np.inf, 0, 0, 1, 2], [np.inf, -np.inf, 0, 0, 1, 2], [np.inf, -np.inf, 0, 0, 2, 1], [np.inf, -np.inf, 0, 0, 0, 3]])
def test_stats_long_string_mem_use(self): X = np.full((1000, 1000), "a", dtype=object) t = time.time() stats(X) t_a = time.time() - t # time for an array with constant-len strings # Add one very long string X[0, 0] = "a" * 2000 # The implementation of stats() in Orange 3.30.2 used .astype("str") # internally. X.astype("str") would take ~1000x the memory as X, # because its type would be "<U1000" (the length of the longest string). # That is about 7.5 GiB of memory on a 64-bit Linux system # Because it is hard to measure CPU, we here measure time as # memory allocation of such big tables takes time. On Marko's # Linux system .astype("str") took ~3 seconds. t = time.time() stats(X) t_b = time.time() - t self.assertLess(t_b, 2 * t_a + 0.1) # some grace period
def test_stats_weights_sparse(self): X = np.arange(4).reshape(2, 2).astype(float) X = csr_matrix(X) weights = np.array([1, 3]) np.testing.assert_equal(stats(X, weights), [[0, 2, 1.5, 0, 1, 1], [1, 3, 2.5, 0, 0, 2]])
def test_stats(self): X = np.arange(4).reshape(2, 2).astype(float) X[1, 1] = np.nan np.testing.assert_equal(stats(X), [[0, 2, 1, 0, 0, 2], [1, 1, 1, 0, 1, 1]])