def setUp(self): x = DiscreteVariable("x", list("abc")) y = DiscreteVariable("y", list("def")) z = DiscreteVariable("z", list("ghijk")) self.descs = [owcolor.DiscAttrDesc(v) for v in (x, y, z)] self.model = owcolor.DiscColorTableModel()
def varcls_modified(self, name): var = super().varcls_modified(name) var.number_of_decimals = 5 var.have_date = 1 var.have_time = 1 return var PickleContinuousVariable = create_pickling_tests( "PickleContinuousVariable", ("with_name", lambda: ContinuousVariable(name="Feature 0")), ) PickleDiscreteVariable = create_pickling_tests( "PickleDiscreteVariable", ("with_name", lambda: DiscreteVariable(name="Feature 0")), ("with_str_value", lambda: DiscreteVariable(name="Feature 0", values=("F", "M")))) PickleStringVariable = create_pickling_tests( "PickleStringVariable", ("with_name", lambda: StringVariable(name="Feature 0"))) class VariableTestMakeProxy(unittest.TestCase): def test_make_proxy_disc(self): abc = DiscreteVariable("abc", values="abc") abc1 = abc.make_proxy() abc2 = abc1.make_proxy() self.assertEqual(abc, abc1) self.assertEqual(abc, abc2)
def test_no_duplicated_values(self): a = DiscreteVariable("foo", values=["a", "b", "c"]) a.add_value("b") self.assertEqual(list(a.values), ["a", "b", "c"]) self.assertEqual(list(a._value_index), ["a", "b", "c"])
def vars_from_df(df, role=None, force_nominal=False): if role is None and hasattr(df, 'orange_role'): role = df.orange_role df = _reset_index(df) cols = [], [], [] exprs = [], [], [] vars_ = [], [], [] for column in df.columns: s = df[column] _role = Role.Attribute if role is None else role if hasattr(df, 'orange_variables') and column in df.orange_variables: original_var = df.orange_variables[column] var = original_var.copy(compute_value=None) expr = None elif _is_datetime(s): var = TimeVariable(str(column)) expr = _convert_datetime elif _is_discrete(s, force_nominal): discrete = s.astype("category").cat var = DiscreteVariable(str(column), discrete.categories.astype(str).tolist()) expr = to_categorical elif is_numeric_dtype(s): var = ContinuousVariable( # set number of decimals to 0 if int else keeps default behaviour str(column), number_of_decimals=(0 if is_integer_dtype(s) else None)) expr = None else: if role is not None and role != Role.Meta: raise ValueError("String variable must be in metas.") _role = Role.Meta var = StringVariable(str(column)) expr = lambda s, _: np.asarray(s, dtype=object) cols[_role].append(column) exprs[_role].append(expr) vars_[_role].append(var) xym = [] for a_vars, a_cols, a_expr in zip(vars_, cols, exprs): if not a_cols: arr = None if a_cols != cols[0] else np.empty((df.shape[0], 0)) elif not any(a_expr): # if all c in columns table will share memory with dataframe a_df = df if all(c in a_cols for c in df.columns) else df[a_cols] if all(isinstance(a, SparseDtype) for a in a_df.dtypes): arr = csr_matrix(a_df.sparse.to_coo()) else: arr = np.asarray(a_df) else: # we'll have to copy the table to resolve any expressions arr = np.array([ expr(df[col], var) if expr else np.asarray(df[col]) for var, col, expr in zip(a_vars, a_cols, a_expr) ]).T xym.append(arr) # Let the tables share memory with pandas frame if xym[1] is not None and xym[1].ndim == 2 and xym[1].shape[1] == 1: xym[1] = xym[1][:, 0] return xym, Domain(*vars_)
class TestSqlTable(PostgresTest): def test_constructs_correct_attributes(self): data = list( zip(self.float_variable(21), self.discrete_variable(21), self.string_variable(21))) with self.sql_table_from_data(data) as table: self.assertEqual(len(table.domain), 2) self.assertEqual(len(table.domain.metas), 1) float_attr, discrete_attr = table.domain.variables string_attr, = table.domain.metas self.assertIsInstance(float_attr, ContinuousVariable) self.assertEqual(float_attr.name, "col0") self.assertTrue('"col0"' in float_attr.to_sql()) self.assertIsInstance(discrete_attr, DiscreteVariable) self.assertEqual(discrete_attr.name, "col1") self.assertTrue('"col1"' in discrete_attr.to_sql()) self.assertEqual(discrete_attr.values, ['f', 'm']) self.assertIsInstance(string_attr, StringVariable) self.assertEqual(string_attr.name, "col2") self.assertTrue('"col2"' in string_attr.to_sql()) def test_make_attributes(self): table1 = SqlTable(self.conn, self.iris) table2 = SqlTable(self.conn, self.iris) self.assertEqual(table1.domain[0], table2.domain[0]) def test_len(self): with self.sql_table_from_data(zip(self.float_variable(26))) as table: self.assertEqual(len(table), 26) with self.sql_table_from_data(zip(self.float_variable(0))) as table: self.assertEqual(len(table), 0) def test_bool(self): with self.sql_table_from_data(()) as table: self.assertEqual(bool(table), False) with self.sql_table_from_data(zip(self.float_variable(1))) as table: self.assertEqual(bool(table), True) def test_len_with_filter(self): data = zip(self.discrete_variable(26)) with self.sql_table_from_data(data) as table: self.assertEqual(len(table), 26) filtered_table = filter.SameValue(table.domain[0], 'm')(table) self.assertEqual(len(filtered_table), 13) table.domain[0].values.append('x') filtered_table = filter.SameValue(table.domain[0], 'x')(table) self.assertEqual(len(filtered_table), 0) def test_XY_small(self): mat = np.random.randint(0, 2, (20, 3)) conn, table_name = self.create_sql_table(mat) sql_table = SqlTable(conn, table_name, type_hints=Domain([], DiscreteVariable( name='col2', values=['0', '1', '2']))) assert_almost_equal(sql_table.X, mat[:, :2]) assert_almost_equal(sql_table.Y.flatten(), mat[:, 2]) @unittest.mock.patch("Orange.data.sql.table.AUTO_DL_LIMIT", 100) def test_XY_large(self): from Orange.data.sql.table import AUTO_DL_LIMIT as DLL mat = np.random.randint(0, 2, (DLL + 100, 3)) conn, table_name = self.create_sql_table(mat) sql_table = SqlTable(conn, table_name, type_hints=Domain([], DiscreteVariable( name='col2', values=['0', '1', '2']))) self.assertRaises(ValueError, lambda: sql_table.X) self.assertRaises(ValueError, lambda: sql_table.Y) with self.assertRaises(ValueError): sql_table.download_data(DLL + 10) # Download partial data sql_table.download_data(DLL + 10, partial=True) assert_almost_equal(sql_table.X, mat[:DLL + 10, :2]) assert_almost_equal(sql_table.Y.flatten()[:DLL + 10], mat[:DLL + 10, 2]) # Download all data sql_table.download_data() assert_almost_equal(sql_table.X, mat[:, :2]) assert_almost_equal(sql_table.Y.flatten(), mat[:, 2]) def test_download_data(self): mat = np.random.randint(0, 2, (20, 3)) conn, table_name = self.create_sql_table(mat) for member in ('X', 'Y', 'metas', 'W', 'ids'): sql_table = SqlTable(conn, table_name, type_hints=Domain( [], DiscreteVariable(name='col2', values=['0', '1', '2']))) self.assertFalse(getattr(sql_table, member) is None) # has all necessary class members to create a standard Table Table(sql_table.domain, sql_table) def test_query_all(self): table = SqlTable(self.conn, self.iris, inspect_values=True) results = list(table) self.assertEqual(len(results), 150) def test_unavailable_row(self): table = SqlTable(self.conn, self.iris) self.assertRaises(IndexError, lambda: table[151]) def test_query_subset_of_attributes(self): table = SqlTable(self.conn, self.iris) attributes = [ self._mock_attribute("sepal length"), self._mock_attribute("sepal width"), self._mock_attribute("double width", '2 * "sepal width"') ] results = list(table._query(attributes)) self.assertSequenceEqual(results[:5], [(5.1, 3.5, 7.0), (4.9, 3.0, 6.0), (4.7, 3.2, 6.4), (4.6, 3.1, 6.2), (5.0, 3.6, 7.2)]) def test_query_subset_of_rows(self): table = SqlTable(self.conn, self.iris) all_results = list(table._query()) results = list(table._query(rows=range(10))) self.assertEqual(len(results), 10) self.assertSequenceEqual(results, all_results[:10]) results = list(table._query(rows=range(10))) self.assertEqual(len(results), 10) self.assertSequenceEqual(results, all_results[:10]) results = list(table._query(rows=slice(None, 10))) self.assertEqual(len(results), 10) self.assertSequenceEqual(results, all_results[:10]) results = list(table._query(rows=slice(10, None))) self.assertEqual(len(results), 140) self.assertSequenceEqual(results, all_results[10:]) def test_getitem_single_value(self): table = SqlTable(self.conn, self.iris, inspect_values=True) self.assertAlmostEqual(table[0, 0], 5.1) def test_type_hints(self): table = SqlTable(self.conn, self.iris, inspect_values=True) self.assertEqual(len(table.domain), 5) self.assertEqual(len(table.domain.metas), 0) table = SqlTable(self.conn, self.iris, inspect_values=True, type_hints=Domain([], [], metas=[StringVariable("iris")])) self.assertEqual(len(table.domain), 4) self.assertEqual(len(table.domain.metas), 1) def test_joins(self): table = SqlTable(self.conn, """SELECT a."sepal length", b. "petal length", CASE WHEN b."petal length" < 3 THEN '<' ELSE '>' END AS "qualitative petal length" FROM iris a INNER JOIN iris b ON a."sepal width" = b."sepal width" WHERE a."petal width" < 1 ORDER BY a."sepal length", b. "petal length" ASC""", type_hints=Domain([ DiscreteVariable(name="qualitative petal length", values=['<', '>']) ], [])) self.assertEqual(len(table), 498) self.assertAlmostEqual(list(table[497]), [5.8, 1.2, 0.]) def _mock_attribute(self, attr_name, formula=None): if formula is None: formula = '"%s"' % attr_name class Attr: name = attr_name @staticmethod def to_sql(): return formula return Attr def test_universal_table(self): _, table_name = self.construct_universal_table() SqlTable( self.conn, """ SELECT v1.col2 as v1, v2.col2 as v2, v3.col2 as v3, v4.col2 as v4, v5.col2 as v5 FROM %(table_name)s v1 INNER JOIN %(table_name)s v2 ON v2.col0 = v1.col0 AND v2.col1 = 2 INNER JOIN %(table_name)s v3 ON v3.col0 = v2.col0 AND v3.col1 = 3 INNER JOIN %(table_name)s v4 ON v4.col0 = v1.col0 AND v4.col1 = 4 INNER JOIN %(table_name)s v5 ON v5.col0 = v1.col0 AND v5.col1 = 5 WHERE v1.col1 = 1 ORDER BY v1.col0 """ % dict(table_name='"%s"' % table_name)) self.drop_sql_table(table_name) def construct_universal_table(self): values = [] for row in range(1, 6): for col in range(1, 6): values.extend((row, col, row * col)) table = Table(np.array(values).reshape((-1, 3))) return self.create_sql_table(table) IRIS_VARIABLE = DiscreteVariable( "iris", values=['Iris-setosa', 'Iris-virginica', 'Iris-versicolor']) def test_class_var_type_hints(self): iris = SqlTable(self.conn, self.iris, type_hints=Domain([], self.IRIS_VARIABLE)) self.assertEqual(len(iris.domain.class_vars), 1) self.assertEqual(iris.domain.class_vars[0].name, 'iris') def test_metas_type_hints(self): iris = SqlTable(self.conn, self.iris, type_hints=Domain([], [], metas=[self.IRIS_VARIABLE])) self.assertEqual(len(iris.domain.metas), 1) self.assertEqual(iris.domain.metas[0].name, 'iris') def test_select_all(self): iris = SqlTable(self.conn, "SELECT * FROM iris", type_hints=Domain([], self.IRIS_VARIABLE)) self.assertEqual(len(iris.domain), 5) def test_discrete_bigint(self): table = np.arange(6).reshape((-1, 1)) conn, table_name = self.create_sql_table(table, ['bigint']) sql_table = SqlTable(conn, table_name, inspect_values=False) self.assertFirstAttrIsInstance(sql_table, ContinuousVariable) sql_table = SqlTable(conn, table_name, inspect_values=True) self.assertFirstAttrIsInstance(sql_table, DiscreteVariable) def test_continous_bigint(self): table = np.arange(25).reshape((-1, 1)) conn, table_name = self.create_sql_table(table, ['bigint']) sql_table = SqlTable(conn, table_name, inspect_values=False) self.assertFirstAttrIsInstance(sql_table, ContinuousVariable) sql_table = SqlTable(conn, table_name, inspect_values=True) self.assertFirstAttrIsInstance(sql_table, ContinuousVariable) def test_discrete_int(self): table = np.arange(6).reshape((-1, 1)) conn, table_name = self.create_sql_table(table, ['int']) sql_table = SqlTable(conn, table_name, inspect_values=False) self.assertFirstAttrIsInstance(sql_table, ContinuousVariable) sql_table = SqlTable(conn, table_name, inspect_values=True) self.assertFirstAttrIsInstance(sql_table, DiscreteVariable) def test_continous_int(self): table = np.arange(25).reshape((-1, 1)) conn, table_name = self.create_sql_table(table, ['int']) sql_table = SqlTable(conn, table_name, inspect_values=False) self.assertFirstAttrIsInstance(sql_table, ContinuousVariable) sql_table = SqlTable(conn, table_name, inspect_values=True) self.assertFirstAttrIsInstance(sql_table, ContinuousVariable) def test_discrete_smallint(self): table = np.arange(6).reshape((-1, 1)) conn, table_name = self.create_sql_table(table, ['smallint']) sql_table = SqlTable(conn, table_name, inspect_values=False) self.assertFirstAttrIsInstance(sql_table, ContinuousVariable) sql_table = SqlTable(conn, table_name, inspect_values=True) self.assertFirstAttrIsInstance(sql_table, DiscreteVariable) def test_continous_smallint(self): table = np.arange(25).reshape((-1, 1)) conn, table_name = self.create_sql_table(table, ['smallint']) sql_table = SqlTable(conn, table_name, inspect_values=False) self.assertFirstAttrIsInstance(sql_table, ContinuousVariable) sql_table = SqlTable(conn, table_name, inspect_values=True) self.assertFirstAttrIsInstance(sql_table, ContinuousVariable) def test_boolean(self): table = np.array(['F', 'T', 0, 1, 'False', 'True']).reshape(-1, 1) conn, table_name = self.create_sql_table(table, ['boolean']) sql_table = SqlTable(conn, table_name, inspect_values=False) self.assertFirstAttrIsInstance(sql_table, DiscreteVariable) sql_table = SqlTable(conn, table_name, inspect_values=True) self.assertFirstAttrIsInstance(sql_table, DiscreteVariable) def test_discrete_char(self): table = np.array(['M', 'F', 'M', 'F', 'M', 'F']).reshape(-1, 1) conn, table_name = self.create_sql_table(table, ['char(1)']) sql_table = SqlTable(conn, table_name, inspect_values=False) self.assertFirstMetaIsInstance(sql_table, StringVariable) sql_table = SqlTable(conn, table_name, inspect_values=True) self.assertFirstAttrIsInstance(sql_table, DiscreteVariable) def test_meta_char(self): table = np.array(list('ABCDEFGHIJKLMNOPQRSTUVW')).reshape(-1, 1) conn, table_name = self.create_sql_table(table, ['char(1)']) sql_table = SqlTable(conn, table_name, inspect_values=False) self.assertFirstMetaIsInstance(sql_table, StringVariable) sql_table = SqlTable(conn, table_name, inspect_values=True) self.assertFirstMetaIsInstance(sql_table, StringVariable) def test_discrete_varchar(self): table = np.array(['M', 'F', 'M', 'F', 'M', 'F']).reshape(-1, 1) conn, table_name = self.create_sql_table(table, ['varchar(1)']) sql_table = SqlTable(conn, table_name, inspect_values=False) self.assertFirstMetaIsInstance(sql_table, StringVariable) sql_table = SqlTable(conn, table_name, inspect_values=True) self.assertFirstAttrIsInstance(sql_table, DiscreteVariable) def test_meta_varchar(self): table = np.array(list('ABCDEFGHIJKLMNOPQRSTUVW')).reshape(-1, 1) conn, table_name = self.create_sql_table(table, ['varchar(1)']) sql_table = SqlTable(conn, table_name, inspect_values=False) self.assertFirstMetaIsInstance(sql_table, StringVariable) sql_table = SqlTable(conn, table_name, inspect_values=True) self.assertFirstMetaIsInstance(sql_table, StringVariable) def test_time_date(self): table = np.array([ '2014-04-12', '2014-04-13', '2014-04-14', '2014-04-15', '2014-04-16' ]).reshape(-1, 1) conn, table_name = self.create_sql_table(table, ['date']) sql_table = SqlTable(conn, table_name, inspect_values=False) self.assertFirstAttrIsInstance(sql_table, TimeVariable) sql_table = SqlTable(conn, table_name, inspect_values=True) self.assertFirstAttrIsInstance(sql_table, TimeVariable) def test_time_time(self): table = np.array([ '17:39:51', '11:51:48.46', '05:20:21.492149', '21:47:06', '04:47:35.8' ]).reshape(-1, 1) conn, table_name = self.create_sql_table(table, ['time']) sql_table = SqlTable(conn, table_name, inspect_values=False) self.assertFirstAttrIsInstance(sql_table, TimeVariable) sql_table = SqlTable(conn, table_name, inspect_values=True) self.assertFirstAttrIsInstance(sql_table, TimeVariable) def test_time_timetz(self): table = np.array([ '17:39:51+0200', '11:51:48.46+01', '05:20:21.4921', '21:47:06-0600', '04:47:35.8+0330' ]).reshape(-1, 1) conn, table_name = self.create_sql_table(table, ['timetz']) sql_table = SqlTable(conn, table_name, inspect_values=False) self.assertFirstAttrIsInstance(sql_table, TimeVariable) sql_table = SqlTable(conn, table_name, inspect_values=True) self.assertFirstAttrIsInstance(sql_table, TimeVariable) def test_time_timestamp(self): table = np.array([ '2014-07-15 17:39:51.348149', '2008-10-05 11:51:48.468149', '2008-11-03 05:20:21.492149', '2015-01-02 21:47:06.228149', '2016-04-16 04:47:35.892149' ]).reshape(-1, 1) conn, table_name = self.create_sql_table(table, ['timestamp']) sql_table = SqlTable(conn, table_name, inspect_values=False) self.assertFirstAttrIsInstance(sql_table, TimeVariable) sql_table = SqlTable(conn, table_name, inspect_values=True) self.assertFirstAttrIsInstance(sql_table, TimeVariable) def test_time_timestamptz(self): table = np.array([ '2014-07-15 17:39:51.348149+0200', '2008-10-05 11:51:48.468149+02', '2008-11-03 05:20:21.492149+01', '2015-01-02 21:47:06.228149+0100', '2016-04-16 04:47:35.892149+0330' ]).reshape(-1, 1) conn, table_name = self.create_sql_table(table, ['timestamptz']) sql_table = SqlTable(conn, table_name, inspect_values=False) self.assertFirstAttrIsInstance(sql_table, TimeVariable) sql_table = SqlTable(conn, table_name, inspect_values=True) self.assertFirstAttrIsInstance(sql_table, TimeVariable) def test_double_precision(self): table = np.arange(25).reshape((-1, 1)) conn, table_name = self.create_sql_table(table, ['double precision']) sql_table = SqlTable(conn, table_name, inspect_values=False) self.assertFirstAttrIsInstance(sql_table, ContinuousVariable) sql_table = SqlTable(conn, table_name, inspect_values=True) self.assertFirstAttrIsInstance(sql_table, ContinuousVariable) def test_numeric(self): table = np.arange(25).reshape((-1, 1)) conn, table_name = self.create_sql_table(table, ['numeric(15, 2)']) sql_table = SqlTable(conn, table_name, inspect_values=False) self.assertFirstAttrIsInstance(sql_table, ContinuousVariable) sql_table = SqlTable(conn, table_name, inspect_values=True) self.assertFirstAttrIsInstance(sql_table, ContinuousVariable) def test_real(self): table = np.arange(25).reshape((-1, 1)) conn, table_name = self.create_sql_table(table, ['real']) sql_table = SqlTable(conn, table_name, inspect_values=False) self.assertFirstAttrIsInstance(sql_table, ContinuousVariable) sql_table = SqlTable(conn, table_name, inspect_values=True) self.assertFirstAttrIsInstance(sql_table, ContinuousVariable) def test_serial(self): table = np.arange(25).reshape((-1, 1)) conn, table_name = self.create_sql_table(table, ['serial']) sql_table = SqlTable(conn, table_name, inspect_values=False) self.assertFirstAttrIsInstance(sql_table, ContinuousVariable) sql_table = SqlTable(conn, table_name, inspect_values=True) self.assertFirstAttrIsInstance(sql_table, ContinuousVariable) @unittest.skipIf(sql_version < 90200, "Type not supported on this server version.") def test_smallserial(self): table = np.arange(25).reshape((-1, 1)) conn, table_name = self.create_sql_table(table, ['smallserial']) sql_table = SqlTable(conn, table_name, inspect_values=False) self.assertFirstAttrIsInstance(sql_table, ContinuousVariable) sql_table = SqlTable(conn, table_name, inspect_values=True) self.assertFirstAttrIsInstance(sql_table, ContinuousVariable) @unittest.skipIf(sql_version < 90200, "Type not supported on this server version.") def test_bigserial(self): table = np.arange(25).reshape((-1, 1)) conn, table_name = self.create_sql_table(table, ['bigserial']) sql_table = SqlTable(conn, table_name, inspect_values=False) self.assertFirstAttrIsInstance(sql_table, ContinuousVariable) sql_table = SqlTable(conn, table_name, inspect_values=True) self.assertFirstAttrIsInstance(sql_table, ContinuousVariable) def test_text(self): table = np.array(list('ABCDEFGHIJKLMNOPQRSTUVW')).reshape((-1, 1)) conn, table_name = self.create_sql_table(table, ['text']) sql_table = SqlTable(conn, table_name, inspect_values=False) self.assertFirstMetaIsInstance(sql_table, StringVariable) sql_table = SqlTable(conn, table_name, inspect_values=True) self.assertFirstMetaIsInstance(sql_table, StringVariable) def test_other(self): table = np.array([ 'bcd4d9c0-361e-bad4-7ceb-0d171cdec981', '544b7ddc-d861-0201-81c8-9f7ad0bbf531', 'b35a10f7-7901-f313-ec16-5ad9778040a6', 'b267c4be-4a26-60b5-e664-737a90a40e93' ]).reshape(-1, 1) conn, table_name = self.create_sql_table(table, ['uuid']) sql_table = SqlTable(conn, table_name, inspect_values=False) self.assertFirstMetaIsInstance(sql_table, StringVariable) sql_table = SqlTable(conn, table_name, inspect_values=True) self.assertFirstMetaIsInstance(sql_table, StringVariable) filters = filter.Values( [filter.FilterString(-1, filter.FilterString.Equal, 'foo')]) self.assertEqual(len(filters(sql_table)), 0) def test_recovers_connection_after_sql_error(self): conn, table_name = self.create_sql_table( np.arange(25).reshape((-1, 1))) sql_table = SqlTable(conn, table_name) try: broken_query = "SELECT 1/%s FROM %s" % ( sql_table.domain.attributes[0].to_sql(), sql_table.table_name) with sql_table.backend.execute_sql_query(broken_query) as cur: cur.fetchall() except BackendError: pass working_query = "SELECT %s FROM %s" % ( sql_table.domain.attributes[0].to_sql(), sql_table.table_name) with sql_table.backend.execute_sql_query(working_query) as cur: cur.fetchall() def test_basic_stats(self): iris = SqlTable(self.conn, self.iris, inspect_values=True) stats = BasicStats(iris, iris.domain['sepal length']) self.assertAlmostEqual(stats.min, 4.3) self.assertAlmostEqual(stats.max, 7.9) self.assertAlmostEqual(stats.mean, 5.8, 1) self.assertEqual(stats.nans, 0) self.assertEqual(stats.non_nans, 150) domain_stats = DomainBasicStats(iris, include_metas=True) self.assertEqual(len(domain_stats.stats), len(iris.domain) + len(iris.domain.metas)) stats = domain_stats['sepal length'] self.assertAlmostEqual(stats.min, 4.3) self.assertAlmostEqual(stats.max, 7.9) self.assertAlmostEqual(stats.mean, 5.8, 1) self.assertEqual(stats.nans, 0) self.assertEqual(stats.non_nans, 150) @unittest.mock.patch("Orange.data.sql.table.LARGE_TABLE", 100) def test_basic_stats_on_large_data(self): # By setting LARGE_TABLE to 100, iris will be treated as # a large table and sampling will be used. As the table # is actually small, time base sampling should return # all rows, so the same assertions can be used. self.test_basic_stats() def test_distributions(self): iris = SqlTable(self.conn, self.iris, inspect_values=True) dists = get_distributions(iris) self.assertEqual(len(dists), 5) dist = dists[0] self.assertAlmostEqual(dist.min(), 4.3) self.assertAlmostEqual(dist.max(), 7.9) self.assertAlmostEqual(dist.mean(), 5.8, 1) def test_contingencies(self): iris = SqlTable(self.conn, self.iris, inspect_values=True) iris.domain = Domain( iris.domain[:2] + (EqualWidth()(iris, iris.domain['sepal width']), ), iris.domain['iris']) conts = get_contingencies(iris) self.assertEqual(len(conts), 3) self.assertIsInstance(conts[0], Continuous) self.assertIsInstance(conts[1], Continuous) self.assertIsInstance(conts[2], Discrete) def test_pickling_restores_connection_pool(self): iris = SqlTable(self.conn, self.iris, inspect_values=True) iris2 = pickle.loads(pickle.dumps(iris)) self.assertEqual(iris[0], iris2[0]) def test_list_tables_with_schema(self): with self.backend.execute_sql_query( "DROP SCHEMA IF EXISTS orange_tests CASCADE") as cur: cur.execute("CREATE SCHEMA orange_tests") cur.execute("CREATE TABLE orange_tests.efgh (id int)") cur.execute("INSERT INTO orange_tests.efgh (id) VALUES (1)") cur.execute("INSERT INTO orange_tests.efgh (id) VALUES (2)") try: tables = self.backend.list_tables("orange_tests") self.assertTrue(any([t.name == "efgh" for t in tables])) SqlTable(self.conn, tables[0], inspect_values=True) finally: with self.backend.execute_sql_query( "DROP SCHEMA IF EXISTS orange_tests CASCADE"): pass def assertFirstAttrIsInstance(self, table, variable_type): self.assertGreater(len(table.domain), 0) attr = table.domain[0] self.assertIsInstance(attr, variable_type) def assertFirstMetaIsInstance(self, table, variable_type): self.assertGreater(len(table.domain.metas), 0) attr = table.domain[-1] self.assertIsInstance(attr, variable_type)
def send_data(self): if self.optimize_k: row = self.selected_row() k = self.k_from + row if row is not None else None else: k = self.k km = self.clusterings.get(k) if self.data is None or km is None or isinstance(km, str): self.Outputs.annotated_data.send(None) self.Outputs.centroids.send(None) return domain = self.data.domain cluster_var = DiscreteVariable( get_unique_names(domain, "Cluster"), values=["C%d" % (x + 1) for x in range(km.k)]) clust_ids = km.labels silhouette_var = ContinuousVariable( get_unique_names(domain, "Silhouette")) if len(self.data) <= SILHOUETTE_MAX_SAMPLES: self.Warning.no_silhouettes.clear() scores = self.samples_scores(clust_ids) clust_scores = [] for i in range(km.k): in_clust = clust_ids == i if in_clust.any(): clust_scores.append(np.mean(scores[in_clust])) else: clust_scores.append(0.) clust_scores = np.atleast_2d(clust_scores).T else: self.Warning.no_silhouettes() scores = np.nan clust_scores = np.full((km.k, 1), np.nan) new_domain = add_columns(domain, metas=[cluster_var, silhouette_var]) new_table = self.data.transform(new_domain) new_table.get_column_view(cluster_var)[0][:] = clust_ids new_table.get_column_view(silhouette_var)[0][:] = scores centroid_attributes = [ attr.compute_value.variable if isinstance(attr.compute_value, ReplaceUnknowns) and attr.compute_value.variable in domain.attributes else attr for attr in km.domain.attributes ] centroid_domain = add_columns(Domain(centroid_attributes, [], domain.metas), metas=[cluster_var, silhouette_var]) centroids = Table( centroid_domain, km.centroids, None, np.hstack((np.full((km.k, len(domain.metas)), np.nan), np.arange(km.k).reshape(km.k, 1), clust_scores))) if self.data.name == Table.name: centroids.name = "centroids" else: centroids.name = f"{self.data.name} centroids" self.Outputs.annotated_data.send(new_table) self.Outputs.centroids.send(centroids)
def test_discrete(self): D = DiscreteVariable("D", values=("a", "b")) self._test_common(D)
is_inside = not is_inside return is_inside clusters = [None] * len(coordinates) for cluster, hull in hulls.items(): for i, c in enumerate(coordinates.X): if point_in_polygon_test(c, hull): clusters[i] = cluster if cluster_attribute is not None: assert all( i in cluster_attribute.values for i in set(clusters) - {None}), "cluster_attribute does not have all required values." # create the table new_domain = Domain([ DiscreteVariable("Clusters", values=sorted(list(hulls.keys()))) if cluster_attribute is None else cluster_attribute ]) return Table( new_domain, np.array(list(map(new_domain[0].to_val, clusters))).reshape(-1, 1)) if __name__ == "__main__": # run hull creation at Iris data data = Table("iris")[:, 2:4] clustered_data = Table( Domain([DiscreteVariable("cl", values=["1", "2", "3"])]), [[0]] * 50 + [[1]] * 50 + [[2]] * 50) compute_concave_hulls(data, clustered_data, epsilon=0.5)
def assign_labels(clusters, annotations, labels_per_cluster): """ This function assigns a certain number of labels per cluster. Each cluster gets `labels_per_cluster` number of most common labels in cluster assigned. Parameters ---------- clusters : Orange.data.Table Cluster indices for each item. annotations : Orange.data.Table Table with annotations and their probabilities. labels_per_cluster : int Number of labels that need to be assigned to each cluster. Returns ------- dict Dictionary with cluster index as a key and list of annotations as a value. Each list include tuples with the annotation name and their proportion in the cluster. Orange.data.Table The array with the annotation assigned to the item. """ clusters_unique = set(clusters.domain[0].values) if len(annotations.domain) == 0: return {}, Table(Domain([DiscreteVariable("Annotation", values=[])]), np.ones((len(clusters), 1)) * np.nan) labels = np.array(list(map(str, annotations.domain.attributes))) # remove rows with all nans nan_mask = np.isnan(annotations.X).all(axis=1) ann_not_nan = annotations.X[~nan_mask] # find indices and labels annotation_best_idx = np.nanargmax(ann_not_nan, axis=1) annotation_best = labels[annotation_best_idx] # join back together items_annotations = np.empty(annotations.X.shape[0], dtype=labels.dtype) items_annotations[~nan_mask] = annotation_best annotations_clusters = {} for cl in clusters_unique: mask = np.array( list(map(clusters.domain.attributes[0].repr_val, clusters.X[:, 0]))).flatten() == cl labels_cl = items_annotations[mask] # remove nans from labels labels_cl_filtered = labels_cl[~(labels_cl == "")] counts = Counter(labels_cl_filtered) common_labels = counts.most_common(labels_per_cluster) if len(common_labels) > 0: annotations_clusters[cl] = [(l, c / len(labels_cl)) for l, c in common_labels] # pack item annotations to Table nan_mask = items_annotations == "" values, indices = np.unique(items_annotations[~nan_mask], return_inverse=True) corrected_idx = np.ones(items_annotations.shape) * np.nan corrected_idx[~nan_mask] = indices domain = Domain([DiscreteVariable("Annotation", values=values)]) item_annotations = Table(domain, corrected_idx.reshape((-1, 1))) return annotations_clusters, item_annotations
def test_nonunique(self): widget = self.widget x = ContinuousVariable("x") d = DiscreteVariable("d", values=list("abc")) domain = Domain([x, d], []) dataA = Table.from_numpy(domain, np.array([[1.0, 0], [1, 1], [2, 1]])) dataB = Table.from_numpy(domain, np.array([[1.0, 0], [2, 1], [3, 1]])) dataB.ids = dataA.ids self.send_signal(widget.Inputs.data, dataA) self.send_signal(widget.Inputs.extra_data, dataB) widget.merging = widget.InnerJoin self.assertFalse(widget.Error.nonunique_left.is_shown()) self.assertFalse(widget.Error.nonunique_right.is_shown()) widget.attr_boxes.set_state([(INSTANCEID, INSTANCEID)]) widget.unconditional_commit() self.assertFalse(widget.Error.nonunique_left.is_shown()) self.assertFalse(widget.Error.nonunique_right.is_shown()) self.assertIsNotNone(self.get_output(widget.Outputs.data)) widget.attr_boxes.set_state([(INDEX, INDEX)]) widget.unconditional_commit() self.assertFalse(widget.Error.nonunique_left.is_shown()) self.assertFalse(widget.Error.nonunique_right.is_shown()) self.assertIsNotNone(self.get_output(widget.Outputs.data)) widget.attr_boxes.set_state([(x, x)]) widget.unconditional_commit() self.assertTrue(widget.Error.nonunique_left.is_shown()) self.assertFalse(widget.Error.nonunique_right.is_shown()) self.assertIsNone(self.get_output(widget.Outputs.data)) widget.merging = widget.LeftJoin widget.unconditional_commit() self.assertFalse(widget.Error.nonunique_left.is_shown()) self.assertFalse(widget.Error.nonunique_right.is_shown()) self.assertIsNotNone(self.get_output(widget.Outputs.data)) widget.merging = widget.InnerJoin widget.attr_boxes.set_state([(x, x), (d, d)]) widget.unconditional_commit() self.assertFalse(widget.Error.nonunique_left.is_shown()) self.assertFalse(widget.Error.nonunique_right.is_shown()) self.assertIsNotNone(self.get_output(widget.Outputs.data)) widget.attr_boxes.set_state([(d, d)]) widget.unconditional_commit() self.assertTrue(widget.Error.nonunique_left.is_shown()) self.assertTrue(widget.Error.nonunique_right.is_shown()) self.assertIsNone(self.get_output(widget.Outputs.data)) widget.merging = widget.LeftJoin widget.unconditional_commit() self.assertFalse(widget.Error.nonunique_left.is_shown()) self.assertTrue(widget.Error.nonunique_right.is_shown()) self.assertIsNone(self.get_output(widget.Outputs.data)) widget.merging = widget.InnerJoin widget.unconditional_commit() self.assertTrue(widget.Error.nonunique_left.is_shown()) self.assertTrue(widget.Error.nonunique_right.is_shown()) self.assertIsNone(self.get_output(widget.Outputs.data)) self.send_signal(widget.Inputs.data, None) self.send_signal(widget.Inputs.extra_data, None) self.assertFalse(widget.Error.nonunique_left.is_shown()) self.assertFalse(widget.Error.nonunique_right.is_shown()) self.assertIsNone(self.get_output(widget.Outputs.data))
VarDataPair = namedtuple('VarDataPair', ['variable', 'data']) # Continuous variable variations continuous_full = VarDataPair( ContinuousVariable('continuous_full'), np.array([0, 1, 2, 3, 4], dtype=float), ) continuous_missing = VarDataPair( ContinuousVariable('continuous_missing'), np.array([0, 1, 2, np.nan, 4], dtype=float), ) # Unordered discrete variable variations rgb_full = VarDataPair( DiscreteVariable('rgb_full', values=('r', 'g', 'b')), np.array([0, 1, 1, 1, 2], dtype=float), ) rgb_missing = VarDataPair( DiscreteVariable('rgb_missing', values=('r', 'g', 'b')), np.array([0, 1, 1, np.nan, 2], dtype=float), ) # Ordered discrete variable variations ints_full = VarDataPair( DiscreteVariable('ints_full', values=('2', '3', '4'), ordered=True), np.array([0, 1, 1, 1, 2], dtype=float), ) ints_missing = VarDataPair( DiscreteVariable('ints_missing', values=('2', '3', '4'), ordered=True), np.array([0, 1, 1, np.nan, 2], dtype=float),
def test_match_attr_name(self): widget = self.widget row = widget.attr_boxes.rows[0] data_combo, extra_combo = row.left_combo, row.right_combo domainA = Domain( [ DiscreteVariable("dA1", ("a", "b", "c", "d")), DiscreteVariable("dA2", ("aa", "bb")), DiscreteVariable("dA3", ("aa", "bb")) ], DiscreteVariable("cls", ("aaa", "bbb", "ccc")), [DiscreteVariable("mA1", ("cc", "dd")), StringVariable("mA2")]) XA = np.array([[0, 0, 0], [1, 1, 0], [2, 0, 0], [3, 1, 0]]) yA = np.array([0, 1, 2, np.nan]) metasA = np.array([[0.0, "m1"], [1.0, "m2"], [np.nan, "m3"], [0.0, "m4"]]).astype(object) domainB = Domain( [ DiscreteVariable("dB1", values=("a", "b", "c")), ContinuousVariable("dA2") ], None, [StringVariable("cls"), DiscreteVariable("dA1", ("m4", "m5"))]) XB = np.array([[0, 0], [1, 1], [2, np.nan]]) yB = np.empty((3, 0)) metasB = np.array([[np.nan, np.nan], [1, 1], [0, 0]]).astype(object) dataA = Table(domainA, XA, yA, metasA) dataA.name = 'dataA' dataA.attributes = 'dataA attributes' dataB = Table(domainB, XB, yB, metasB) dataB.name = 'dataB' dataB.attributes = 'dataB attributes' self.send_signal(widget.Inputs.data, dataA) self.send_signal(widget.Inputs.extra_data, dataB) # match variable if available and the other combo is Row Index extra_combo.setCurrentIndex(0) extra_combo.activated.emit(0) data_combo.setCurrentIndex(2) data_combo.activated.emit(2) self.assertEqual(extra_combo.currentIndex(), 5) # match variable if available and the other combo is ID extra_combo.setCurrentIndex(1) extra_combo.activated.emit(1) data_combo.setCurrentIndex(2) data_combo.activated.emit(2) self.assertEqual(extra_combo.currentIndex(), 5) # don't match variable if other combo is set extra_combo.setCurrentIndex(4) extra_combo.activated.emit(4) data_combo.setCurrentIndex(2) data_combo.activated.emit(2) self.assertEqual(extra_combo.currentIndex(), 4) # don't match if nothing to match to extra_combo.setCurrentIndex(0) extra_combo.activated.emit(0) data_combo.setCurrentIndex(4) data_combo.activated.emit(4) self.assertEqual(extra_combo.currentIndex(), 0) # don't match numeric with non-numeric extra_combo.setCurrentIndex(0) extra_combo.activated.emit(0) data_combo.setCurrentIndex(3) data_combo.activated.emit(3) self.assertEqual(extra_combo.currentIndex(), 0) # allow matching string with discrete extra_combo.setCurrentIndex(0) extra_combo.activated.emit(0) data_combo.setCurrentIndex(5) data_combo.activated.emit(5) self.assertEqual(extra_combo.currentIndex(), 4)
def __get_pivot_tab_domain(self, val_var, X, X_h, X_v, X_t, agg_funs): def map_values(index, _X): values = np.unique(_X[:, index]) values = np.delete(values, np.where(values == "nan")[0]) for j, value in enumerate(values): _X[:, index][_X[:, index] == value] = j return values create_time_var = \ isinstance(val_var, TimeVariable) and \ all(fun in self.TimeVarFunctions for fun in agg_funs) create_cont_var = \ not val_var or val_var.is_continuous and \ (not isinstance(val_var, TimeVariable) or all(fun in self.FloatFunctions for fun in agg_funs)) vals = np.array(self._col_var.values)[self._col_var_groups.astype(int)] if create_time_var: kwargs = { "have_date": val_var.have_date, "have_time": val_var.have_time } attrs = [[TimeVariable(f"{v}", **kwargs) for v in vals]] * 2 attrs.extend([[TimeVariable("Total", **kwargs)]] * 2) elif create_cont_var: attrs = [[ContinuousVariable(f"{v}", 1) for v in vals]] * 2 attrs.extend([[ContinuousVariable("Total", 1)]] * 2) else: attrs = [] for x in (X, X_h): attrs.append([ DiscreteVariable(f"{v}", map_values(i, x)) for i, v in enumerate(vals, 2) ]) for x in (X_v, X_t): attrs.append([DiscreteVariable("Total", map_values(0, x))]) row_var_h = DiscreteVariable(self._row_var.name, values=["Total"]) aggr_attr = DiscreteVariable('Aggregate', [str(f) for f in agg_funs]) same_row_col = self._col_var is self._row_var extra_vars = [self._row_var, aggr_attr] uniq_a = get_unique_names_duplicates([v.name for v in extra_vars] + [atr.name for atr in attrs[0]]) for (idx, var), u in zip(enumerate(chain(extra_vars, attrs[0])), uniq_a): if var.name == u: continue if idx == 0: self.renamed.append(self._row_var.name) self._row_var = self._row_var.copy(name=u) if same_row_col: self._col_var = self._row_var row_var_h = row_var_h.copy(name=u) elif idx == 1: self.renamed.append(aggr_attr.name) aggr_attr = aggr_attr.copy(name=u) else: self.renamed.append(var.name) attrs[0][idx - 2] = var.copy(name=u) attrs[1][idx - 2] = var.copy(name=u) if same_row_col: vals = tuple(v.name for v in attrs[0]) self._row_var.make(self._row_var.name, values=vals) vals = tuple(v.name for v in attrs[2]) row_var_h.make(row_var_h.name, vals) return (Domain([self._row_var, aggr_attr] + attrs[0]), Domain([row_var_h, aggr_attr] + attrs[1]), Domain(attrs[2]), Domain(attrs[3]))
def setUp(self): self.var = DiscreteVariable("x", ["a", "b", "c"]) self.desc = owcolor.DiscAttrDesc(self.var)
def setUp(self): self.domain = Domain([DiscreteVariable(c) for c in "abc"]) self.data = Table(self.domain, [[0, 1, 1], [1, 1, 1], [1, 0, 1], [1, 0, 0]])
def cluster_additional_points(coordinates, hulls, cluster_attribute=None): """ This function receives additional points and assign them current existing clusters based on current concave hull. Parameters ---------- coordinates : Orange.data.Table Visualisation coordinates - embeddings hulls : dict Concave hull for each cluster cluster_attribute : Orange.data.DiscreteVariable (optional) A variable for clusters. If cluster_attribute is provided it will be used in the creation of the resulting Table. Returns ------- Orange.data.Table Cluster label for each point """ def point_in_polygon_test(test_point, polygon_points): """ This function uses the horizontal ray casting to find out if the point is in the hull/polygon. For each point, it tests how many times the horizontal ray from test_point to infinity crosses the polygon edge. If it happens odd many times the point is in the polygon. https://stackoverflow.com/a/2922778/3551700 """ test_x = test_point[0] test_y = test_point[1] # flipping bool from True to False is similar to counting odd numbers # of intersections. If it will be True at the end odd number of # intersections happened is_inside = False for (x1, y1), (x2, y2) in zip( polygon_points, np.concatenate((polygon_points[1:], polygon_points[:1]), axis=0)): # ray crosses the edge if test_y between both y from an edge # and if intersection on the right of the test_x if (y1 > test_y) != (y2 > test_y): # compute the intersection between the horizontal ray and # polygon edge intersection_x = (x2 - x1) * (test_y - y1) / (y2 - y1) + x1 if test_x < intersection_x: is_inside = not is_inside return is_inside clusters = [None] * len(coordinates) for cluster, hull in hulls.items(): for i, c in enumerate(coordinates.X): if point_in_polygon_test(c, hull): clusters[i] = cluster if cluster_attribute is not None: assert all( i in cluster_attribute.values for i in set(clusters) - {None}), "cluster_attribute does not have all required values." # create the table new_domain = Domain([ DiscreteVariable("Clusters", values=sorted(list(hulls.keys()))) if cluster_attribute is None else cluster_attribute ]) return Table( new_domain, np.array(list(map(new_domain[0].to_val, clusters))).reshape(-1, 1))
import warnings from unittest import TestCase from unittest.mock import Mock from Orange.data import Domain, DiscreteVariable from Orange.data import ContinuousVariable from Orange.util import OrangeDeprecationWarning from Orange.widgets.settings import DomainContextHandler, ContextSetting from Orange.widgets.utils import vartype Continuous = vartype(ContinuousVariable()) Discrete = vartype(DiscreteVariable()) class TestDomainContextHandler(TestCase): def setUp(self): self.domain = Domain( attributes=[ContinuousVariable('c1'), DiscreteVariable('d1', values='abc'), DiscreteVariable('d2', values='def')], class_vars=[DiscreteVariable('d3', values='ghi')], metas=[ContinuousVariable('c2'), DiscreteVariable('d4', values='jkl')] ) self.args = (self.domain, {'c1': Continuous, 'd1': Discrete, 'd2': Discrete, 'd3': Discrete}, {'c2': Continuous, 'd4': Discrete, }) self.handler = DomainContextHandler() self.handler.read_defaults = lambda: None def test_encode_domain_with_match_none(self):
def table_from_frame(df, class_name, *, force_nominal=False): """ Convert pandas.DataFrame to Orange.data.Table Parameters ---------- df : pandas.DataFrame force_nominal : boolean If True, interpret ALL string columns as nominal (DiscreteVariable). Returns ------- Table """ def _is_discrete(s): return (is_categorical_dtype(s) or is_object_dtype(s) and (force_nominal or s.nunique() < s.size**.666)) def _is_datetime(s): if is_datetime64_any_dtype(s): return True try: if is_object_dtype(s): pd.to_datetime(s, infer_datetime_format=True) return True except Exception: # pylint: disable=broad-except pass return False # If df index is not a simple RangeIndex (or similar), put it into data if not (df.index.is_integer() and (df.index.is_monotonic_increasing or df.index.is_monotonic_decreasing)): df = df.reset_index() attrs, metas, calss_vars = [], [], [] X, M = [], [] # Iter over columns for name, s in df.items(): name = str(name) if name == class_name: discrete = s.astype('category').cat calss_vars.append( DiscreteVariable(name, discrete.categories.astype(str).tolist())) X.append(discrete.codes.replace(-1, np.nan).values) elif _is_discrete(s): discrete = s.astype('category').cat attrs.append( DiscreteVariable(name, discrete.categories.astype(str).tolist())) X.append(discrete.codes.replace(-1, np.nan).values) elif _is_datetime(s): tvar = TimeVariable(name) attrs.append(tvar) s = pd.to_datetime(s, infer_datetime_format=True) X.append( s.astype('str').replace('NaT', np.nan).map(tvar.parse).values) elif is_numeric_dtype(s): attrs.append(ContinuousVariable(name)) X.append(s.values) else: metas.append(StringVariable(name)) M.append(s.values.astype(object)) return Table.from_numpy( Domain(attrs, calss_vars, metas), np.column_stack(X) if X else np.empty((df.shape[0], 0)), None, np.column_stack(M) if M else None)
def test_value_from_discrete_substring(self): trans = ValueFromDiscreteSubstring( DiscreteVariable("x", values=self.arr), self.patterns) np.testing.assert_equal(trans.lookup_table, [0, 1, 2, 0, 3])
from orangewidget.widget import StateInfo from Orange.data import Table, ContinuousVariable, DiscreteVariable, Domain from Orange.widgets.settings import ContextSetting from Orange.widgets.utils import vartype from Orange.widgets.utils.state_summary import format_summary_details from Orange.widgets.tests.base import WidgetTest from Orange.widgets.data.owselectcolumns \ import OWSelectAttributes, VariablesListItemModel, \ SelectAttributesDomainContextHandler from Orange.widgets.data.owrank import OWRank from Orange.widgets.widget import AttributeList Continuous = vartype(ContinuousVariable("c")) Discrete = vartype(DiscreteVariable("d")) class TestSelectAttributesDomainContextHandler(TestCase): def setUp(self): self.domain = Domain(attributes=[ ContinuousVariable('c1'), DiscreteVariable('d1', values='abc'), DiscreteVariable('d2', values='def') ], class_vars=[DiscreteVariable('d3', values='ghi')], metas=[ ContinuousVariable('c2'), DiscreteVariable('d4', values='jkl') ]) self.args = (self.domain, {
def test_discrete_rename(self): D = DiscreteVariable("D", values=("a", "b")) DD = apply_transform_var(D, [CategoriesMapping((("a", "A"), ("b", "B")))]) self.assertSequenceEqual(DD.values, ["A", "B"]) self.assertIs(DD.compute_value.variable, D)
class TestSparseTablePandas(TestTablePandas): features = ( ContinuousVariable(name="c2"), ContinuousVariable(name="Continuous Feature 2"), DiscreteVariable(name="d1", values=("0", "1")), DiscreteVariable(name="Discrete Feature 2", values=("value1", "value2")), ) class_vars = (ContinuousVariable(name="Continuous Class"), DiscreteVariable(name="Discrete Class", values=("m", "f"))) feature_data = ( (1, 0, 0, 0), (0, 1, 0, 0), (0, 1, 1, 0), (0, 0, 0, 0), (0, 1, 1, 0), (0, 0, 0, 0), (0, 1, 1, 0), ) class_data = ( (1, 0), (0, 1), (1, 0), (0, 1), (1, 0), (0, 1), (1, 0), ) def setUp(self): self.domain = Domain(attributes=self.features, class_vars=self.class_vars) table = Table.from_numpy( self.domain, np.array(self.feature_data), np.array(self.class_data), ) self.table = Table.from_numpy(self.domain, csr_matrix(table.X), csr_matrix(table.Y), W=np.array([1, 0, 1, 0, 1, 1, 1])) def arreq(t1, t2): if all(sp.issparse(t) for t in (t1, t2)): return self.assertEqual((t1 != t2).nnz, 0) else: return np.array_equal(t1, t2) self.__arreq__ = arreq def test_to_dense(self): df = self.table.X_df self.assertIsInstance(df, OrangeDataFrame) ddf = df.sparse.to_dense() np.testing.assert_array_equal(df.index, ddf.index) np.testing.assert_array_equal(df.orange_variables, ddf.orange_variables) np.testing.assert_array_equal(df.orange_attributes, ddf.orange_attributes) np.testing.assert_array_equal(df.orange_role, ddf.orange_role) np.testing.assert_array_equal(df.orange_weights, ddf.orange_weights) table = self.table.to_dense() table2 = ddf.to_orange_table() np.testing.assert_array_equal(table2.X, table.X) np.testing.assert_array_equal(table2.ids, table.ids) np.testing.assert_array_equal(table2.W, table.W) np.testing.assert_array_equal(table2.attributes, table.attributes)
def test_init(self): var = DiscreteVariable(name="fold", values="abc") res = CrossValidationFeature(feature=var) self.assertIs(res.feature, var)
output_csv.getvalue().splitlines()) def test_repr_value(self): # https://github.com/biolab/orange3/pull/1760 var = TimeVariable('time') self.assertEqual(var.repr_val(Value(var, 416.3)), '416.3') PickleContinuousVariable = create_pickling_tests( "PickleContinuousVariable", ("with_name", lambda: ContinuousVariable(name="Feature 0")), ) PickleDiscreteVariable = create_pickling_tests( "PickleDiscreteVariable", ("with_name", lambda: DiscreteVariable(name="Feature 0")), ("with_int_values", lambda: DiscreteVariable(name="Feature 0", values=[1, 2, 3])), ("with_str_value", lambda: DiscreteVariable(name="Feature 0", values=["F", "M"])), ("ordered", lambda: DiscreteVariable( name="Feature 0", values=["F", "M"], ordered=True)), ("with_base_value", lambda: DiscreteVariable( name="Feature 0", values=["F", "M"], base_value=0))) PickleStringVariable = create_pickling_tests( "PickleStringVariable", ("with_name", lambda: StringVariable(name="Feature 0"))) @variabletest(DiscreteVariable)
import warnings from distutils.version import LooseVersion from unittest import TestCase from unittest.mock import Mock import Orange from Orange.data import Domain, DiscreteVariable from Orange.data import ContinuousVariable from Orange.util import OrangeDeprecationWarning from Orange.widgets.settings import DomainContextHandler, ContextSetting from Orange.widgets.utils import vartype Continuous = 100 + vartype(ContinuousVariable("x")) Discrete = 100 + vartype(DiscreteVariable("x")) class TestDomainContextHandler(TestCase): def setUp(self): self.domain = Domain(attributes=[ ContinuousVariable('c1'), DiscreteVariable('d1', values='abc'), DiscreteVariable('d2', values='def') ], class_vars=[DiscreteVariable('d3', values='ghi')], metas=[ ContinuousVariable('c2'), DiscreteVariable('d4', values='jkl') ]) self.args = (self.domain, { 'c1': Continuous - 100, 'd1': Discrete - 100,
continuous_all_missing = VarDataPair( ContinuousVariable('continuous_all_missing'), np.array([np.nan] * 5, dtype=float), ) continuous_same = VarDataPair( ContinuousVariable('continuous_same'), np.array([3] * 5, dtype=float), ) continuous = [ continuous_full, continuous_missing, continuous_all_missing, continuous_same ] # Unordered discrete variable variations rgb_full = VarDataPair( DiscreteVariable('rgb_full', values=['r', 'g', 'b']), np.array([0, 1, 1, 1, 2], dtype=float), ) rgb_missing = VarDataPair( DiscreteVariable('rgb_missing', values=['r', 'g', 'b']), np.array([0, 1, 1, np.nan, 2], dtype=float), ) rgb_all_missing = VarDataPair( DiscreteVariable('rgb_all_missing', values=['r', 'g', 'b']), np.array([np.nan] * 5, dtype=float), ) rgb_bins_missing = VarDataPair( DiscreteVariable('rgb_bins_missing', values=['r', 'g', 'b']), np.array([np.nan, 1, 1, 1, np.nan], dtype=float), ) rgb_same = VarDataPair(
def test_no_nonstringvalues(self): self.assertRaises(TypeError, DiscreteVariable, "foo", values=("a", 42)) a = DiscreteVariable("foo", values=("a", "b", "c")) self.assertRaises(TypeError, a.add_value, 42)
def test_colors_diff_domain(self): """ Test whether the color selection for values is correct. """ # pylint: disable=protected-access self.send_signal(self.widget.Inputs.data, self.iris) # case 1: two domains one subset other idom = self.iris.domain dom1 = Domain( idom.attributes, DiscreteVariable(idom.class_var.name, idom.class_var.values) ) dom2 = Domain( idom.attributes, DiscreteVariable(idom.class_var.name, idom.class_var.values[:2]) ) iris1 = self.iris[:100].transform(dom1) iris2 = self.iris[:100].transform(dom2) predictor_iris1 = ConstantLearner()(iris1) predictor_iris2 = ConstantLearner()(iris2) self.send_signal(self.widget.Inputs.predictors, predictor_iris1) self.send_signal(self.widget.Inputs.predictors, predictor_iris2, 1) colors = self.widget._get_colors() np.testing.assert_array_equal(colors, iris1.domain.class_var.colors) # case 2: two domains one subset other - different color order idom = self.iris.domain colors = idom.class_var.colors[::-1] dom1 = Domain( idom.attributes, DiscreteVariable(idom.class_var.name, idom.class_var.values) ) dom2 = Domain( idom.attributes, DiscreteVariable(idom.class_var.name, idom.class_var.values[:2]) ) dom1.class_var.colors = colors dom2.class_var.colors = colors[:2] iris1 = self.iris[:100].transform(dom1) iris2 = self.iris[:100].transform(dom2) predictor_iris1 = ConstantLearner()(iris1) predictor_iris2 = ConstantLearner()(iris2) self.send_signal(self.widget.Inputs.predictors, predictor_iris1) self.send_signal(self.widget.Inputs.predictors, predictor_iris2, 1) colors = self.widget._get_colors() np.testing.assert_array_equal(colors, iris1.domain.class_var.colors) # case 3: domain color, values miss-match - use default colors idom = self.iris.domain dom1 = Domain( idom.attributes, DiscreteVariable(idom.class_var.name, idom.class_var.values) ) dom2 = Domain( idom.attributes, DiscreteVariable(idom.class_var.name, idom.class_var.values) ) dom1.class_var.colors = dom1.class_var.colors[::-1] iris1 = self.iris.transform(dom1) iris2 = self.iris.transform(dom2) predictor_iris1 = ConstantLearner()(iris1) predictor_iris2 = ConstantLearner()(iris2) self.send_signal(self.widget.Inputs.predictors, predictor_iris1) self.send_signal(self.widget.Inputs.predictors, predictor_iris2, 1) colors = self.widget._get_colors() np.testing.assert_array_equal(colors, ColorPaletteGenerator.palette(3)) # case 4: two domains different values order, matching colors idom = self.iris.domain # this way we know that default colors are not used colors = ColorPaletteGenerator.palette(5)[2:] dom1 = Domain( idom.attributes, DiscreteVariable(idom.class_var.name, idom.class_var.values) ) dom2 = Domain( idom.attributes, DiscreteVariable(idom.class_var.name, idom.class_var.values[::-1]) ) dom1.class_var.colors = colors dom2.class_var.colors = colors[::-1] # colors mixed same than values iris1 = self.iris[:100].transform(dom1) iris2 = self.iris[:100].transform(dom2) predictor_iris1 = ConstantLearner()(iris1) predictor_iris2 = ConstantLearner()(iris2) self.send_signal(self.widget.Inputs.predictors, predictor_iris1) self.send_signal(self.widget.Inputs.predictors, predictor_iris2, 1) colors = self.widget._get_colors() np.testing.assert_array_equal(colors, iris1.domain.class_var.colors)
def tool_tip(value): value, dist = value if dist is not None: return "{!s} {!s}".format(value, dist) else: return str(value) if __name__ == "__main__": # pragma: no cover filename = "iris.tab" iris = Orange.data.Table(filename) idom = iris.domain dom = Domain( idom.attributes, DiscreteVariable(idom.class_var.name, idom.class_var.values[1::-1])) iris2 = iris[:100].transform(dom) def pred_error(data, *args, **kwargs): raise ValueError pred_error.domain = iris.domain pred_error.name = "To err is human" if iris.domain.has_discrete_class: predictors_ = [ Orange.classification.SVMLearner(probability=True)(iris2), Orange.classification.LogisticRegressionLearner()(iris), pred_error ] elif iris.domain.has_continuous_class: predictors_ = [
class TestInstance(unittest.TestCase): attributes = ["Feature %i" % i for i in range(10)] class_vars = ["Class %i" % i for i in range(1)] metas = [ DiscreteVariable("Meta 1", values="XYZ"), ContinuousVariable("Meta 2"), StringVariable("Meta 3") ] def mock_domain(self, with_classes=False, with_metas=False): attributes = self.attributes class_vars = self.class_vars if with_classes else [] metas = self.metas if with_metas else [] variables = attributes + class_vars return MagicMock(Domain, attributes=attributes, class_vars=class_vars, metas=metas, variables=variables) def create_domain(self, attributes=(), classes=(), metas=()): attr_vars = [ ContinuousVariable(name=a) if isinstance(a, str) else a for a in attributes ] class_vars = [ ContinuousVariable(name=c) if isinstance(c, str) else c for c in classes ] meta_vars = [ DiscreteVariable(name=m, values=map(str, range(5))) if isinstance( m, str) else m for m in metas ] domain = Domain(attr_vars, class_vars, meta_vars) return domain def test_init_x_no_data(self): domain = self.mock_domain() inst = Instance(domain) self.assertIsInstance(inst, Instance) self.assertIs(inst.domain, domain) self.assertEqual(inst._x.shape, (len(self.attributes), )) self.assertEqual(inst._y.shape, (0, )) self.assertEqual(inst._metas.shape, (0, )) self.assertTrue(all(isnan(x) for x in inst._x)) def test_init_xy_no_data(self): domain = self.mock_domain(with_classes=True) inst = Instance(domain) self.assertIsInstance(inst, Instance) self.assertIs(inst.domain, domain) self.assertEqual(inst._x.shape, (len(self.attributes), )) self.assertEqual(inst._y.shape, (len(self.class_vars), )) self.assertEqual(inst._metas.shape, (0, )) self.assertTrue(all(isnan(x) for x in inst._x)) self.assertTrue(all(isnan(x) for x in inst._y)) def test_init_xym_no_data(self): domain = self.mock_domain(with_classes=True, with_metas=True) inst = Instance(domain) self.assertIsInstance(inst, Instance) self.assertIs(inst.domain, domain) self.assertEqual(inst._x.shape, (len(self.attributes), )) self.assertEqual(inst._y.shape, (len(self.class_vars), )) self.assertEqual(inst._metas.shape, (3, )) self.assertTrue(all(isnan(x) for x in inst._x)) self.assertTrue(all(isnan(x) for x in inst._y)) with warnings.catch_warnings(): warnings.simplefilter("ignore", FutureWarning) assert_array_equal( inst._metas, np.array([Unknown, Unknown, Unknown], dtype=object)) def test_init_x_arr(self): domain = self.create_domain(["x", DiscreteVariable("g", values="MF")]) vals = np.array([42, 0]) inst = Instance(domain, vals) assert_array_equal(inst._x, vals) self.assertEqual(inst._y.shape, (0, )) self.assertEqual(inst._metas.shape, (0, )) domain = self.create_domain() inst = Instance(domain, np.empty((0, ))) self.assertEqual(inst._x.shape, (0, )) self.assertEqual(inst._y.shape, (0, )) self.assertEqual(inst._metas.shape, (0, )) def test_init_x_list(self): domain = self.create_domain(["x", DiscreteVariable("g", values="MF")]) lst = [42, 0] vals = np.array(lst) inst = Instance(domain, vals) assert_array_equal(inst._x, vals) self.assertEqual(inst._y.shape, (0, )) self.assertEqual(inst._metas.shape, (0, )) domain = self.create_domain() inst = Instance(domain, []) self.assertEqual(inst._x.shape, (0, )) self.assertEqual(inst._y.shape, (0, )) self.assertEqual(inst._metas.shape, (0, )) def test_init_xy_arr(self): domain = self.create_domain( ["x", DiscreteVariable("g", values="MF")], [DiscreteVariable("y", values="ABC")]) vals = np.array([42, 0, 1]) inst = Instance(domain, vals) assert_array_equal(inst._x, vals[:2]) self.assertEqual(inst._y.shape, (1, )) self.assertEqual(inst._y[0], 1) self.assertEqual(inst._metas.shape, (0, )) def test_init_xy_list(self): domain = self.create_domain( ["x", DiscreteVariable("g", values="MF")], [DiscreteVariable("y", values="ABC")]) lst = [42, "M", "C"] vals = np.array([42, 0, 2]) inst = Instance(domain, vals) assert_array_equal(inst._x, vals[:2]) self.assertEqual(inst._y.shape, (1, )) self.assertEqual(inst._y[0], 2) self.assertEqual(inst._metas.shape, (0, )) def test_init_xym_arr(self): domain = self.create_domain( ["x", DiscreteVariable("g", values="MF")], [DiscreteVariable("y", values="ABC")], self.metas) vals = np.array([42, "M", "B", "X", 43, "Foo"], dtype=object) inst = Instance(domain, vals) self.assertIsInstance(inst, Instance) self.assertIs(inst.domain, domain) self.assertEqual(inst._x.shape, (2, )) self.assertEqual(inst._y.shape, (1, )) self.assertEqual(inst._metas.shape, (3, )) assert_array_equal(inst._x, np.array([42, 0])) self.assertEqual(inst._y[0], 1) assert_array_equal(inst._metas, np.array([0, 43, "Foo"], dtype=object)) def test_init_xym_list(self): domain = self.create_domain( ["x", DiscreteVariable("g", values="MF")], [DiscreteVariable("y", values="ABC")], self.metas) vals = [42, "M", "B", "X", 43, "Foo"] inst = Instance(domain, vals) self.assertIsInstance(inst, Instance) self.assertIs(inst.domain, domain) self.assertEqual(inst._x.shape, (2, )) self.assertEqual(inst._y.shape, (1, )) self.assertEqual(inst._metas.shape, (3, )) assert_array_equal(inst._x, np.array([42, 0])) self.assertEqual(inst._y[0], 1) assert_array_equal(inst._metas, np.array([0, 43, "Foo"], dtype=object)) def test_init_inst(self): domain = self.create_domain( ["x", DiscreteVariable("g", values="MF")], [DiscreteVariable("y", values="ABC")], self.metas) vals = [42, "M", "B", "X", 43, "Foo"] inst = Instance(domain, vals) inst2 = Instance(domain, inst) assert_array_equal(inst2._x, np.array([42, 0])) self.assertEqual(inst2._y[0], 1) assert_array_equal(inst2._metas, np.array([0, 43, "Foo"], dtype=object)) domain2 = self.create_domain(["z", domain[1], self.metas[1]], domain.class_vars, [self.metas[0], "w", domain[0]]) inst2 = Instance(domain2, inst) with warnings.catch_warnings(): warnings.simplefilter("ignore", FutureWarning) assert_array_equal(inst2._x, np.array([Unknown, 0, 43])) self.assertEqual(inst2._y[0], 1) assert_array_equal(inst2._metas, np.array([0, Unknown, 42], dtype=object)) def test_get_item(self): domain = self.create_domain( ["x", DiscreteVariable("g", values="MF")], [DiscreteVariable("y", values="ABC")], self.metas) vals = [42, "M", "B", "X", 43, "Foo"] inst = Instance(domain, vals) val = inst[0] self.assertIsInstance(val, Value) self.assertEqual(inst[0], 42) self.assertEqual(inst["x"], 42) self.assertEqual(inst[domain[0]], 42) val = inst[1] self.assertIsInstance(val, Value) self.assertEqual(inst[1], "M") self.assertEqual(inst["g"], "M") self.assertEqual(inst[domain[1]], "M") val = inst[2] self.assertIsInstance(val, Value) self.assertEqual(inst[2], "B") self.assertEqual(inst["y"], "B") self.assertEqual(inst[domain.class_var], "B") val = inst[-2] self.assertIsInstance(val, Value) self.assertEqual(inst[-2], 43) self.assertEqual(inst["Meta 2"], 43) self.assertEqual(inst[self.metas[1]], 43) with self.assertRaises(ValueError): inst["asdf"] = 42 with self.assertRaises(ValueError): inst[ContinuousVariable("asdf")] = 42 def test_list(self): domain = self.create_domain( ["x", DiscreteVariable("g", values="MF")], [DiscreteVariable("y", values="ABC")], self.metas) vals = [42, "M", "B", "X", 43, "Foo"] inst = Instance(domain, vals) l = inst.list self.assertIsInstance(l, list) self.assertEqual(l, [42, "M", "B", "X", 43, "Foo"]) self.assertGreater(len(l), len(inst)) self.assertEqual(len(l), 6) def test_set_item(self): domain = self.create_domain( ["x", DiscreteVariable("g", values="MF")], [DiscreteVariable("y", values="ABC")], self.metas) vals = [42, "M", "B", "X", 43, "Foo"] inst = Instance(domain, vals) inst[0] = 43 self.assertEqual(inst[0], 43) inst["x"] = 44 self.assertEqual(inst[0], 44) inst[domain[0]] = 45 self.assertEqual(inst[0], 45) inst[1] = "F" self.assertEqual(inst[1], "F") inst["g"] = "M" self.assertEqual(inst[1], "M") with self.assertRaises(ValueError): inst[1] = "N" with self.assertRaises(ValueError): inst["asdf"] = 42 inst[2] = "C" self.assertEqual(inst[2], "C") inst["y"] = "A" self.assertEqual(inst[2], "A") inst[domain.class_var] = "B" self.assertEqual(inst[2], "B") inst[-1] = "Y" self.assertEqual(inst[-1], "Y") inst["Meta 1"] = "Z" self.assertEqual(inst[-1], "Z") inst[domain.metas[0]] = "X" self.assertEqual(inst[-1], "X") def test_str(self): domain = self.create_domain(["x", DiscreteVariable("g", values="MF")]) inst = Instance(domain, [42, 0]) self.assertEqual(str(inst), "[42.000, M]") domain = self.create_domain( ["x", DiscreteVariable("g", values="MF")], [DiscreteVariable("y", values="ABC")]) inst = Instance(domain, [42, "M", "B"]) self.assertEqual(str(inst), "[42.000, M | B]") domain = self.create_domain( ["x", DiscreteVariable("g", values="MF")], [DiscreteVariable("y", values="ABC")], self.metas) inst = Instance(domain, [42, "M", "B", "X", 43, "Foo"]) self.assertEqual(str(inst), "[42.000, M | B] {X, 43.000, Foo}") domain = self.create_domain([], [DiscreteVariable("y", values="ABC")], self.metas) inst = Instance(domain, ["B", "X", 43, "Foo"]) self.assertEqual(str(inst), "[ | B] {X, 43.000, Foo}") domain = self.create_domain([], [], self.metas) inst = Instance(domain, ["X", 43, "Foo"]) self.assertEqual(str(inst), "[] {X, 43.000, Foo}") domain = self.create_domain(self.attributes) inst = Instance(domain, range(len(self.attributes))) self.assertEqual( str(inst), "[{}]".format(", ".join("{:.3f}".format(x) for x in range(len(self.attributes))))) for attr in domain: attr.number_of_decimals = 0 self.assertEqual( str(inst), "[{}]".format(", ".join("{}".format(x) for x in range(len(self.attributes))))) def test_repr(self): domain = self.create_domain(self.attributes) inst = Instance(domain, range(len(self.attributes))) self.assertEqual(repr(inst), "[0.000, 1.000, 2.000, 3.000, 4.000, ...]") for attr in domain: attr.number_of_decimals = 0 self.assertEqual(repr(inst), "[0, 1, 2, 3, 4, ...]") def test_eq(self): domain = self.create_domain( ["x", DiscreteVariable("g", values="MF")], [DiscreteVariable("y", values="ABC")], self.metas) vals = [42, "M", "B", "X", 43, "Foo"] inst = Instance(domain, vals) inst2 = Instance(domain, vals) self.assertTrue(inst == inst2) self.assertTrue(inst2 == inst) inst2[0] = 43 self.assertFalse(inst == inst2) inst2[0] = Unknown self.assertFalse(inst == inst2) inst2 = Instance(domain, vals) inst2[2] = "C" self.assertFalse(inst == inst2) inst2 = Instance(domain, vals) inst2[-1] = "Y" self.assertFalse(inst == inst2) inst2 = Instance(domain, vals) inst2[-2] = "33" self.assertFalse(inst == inst2) inst2 = Instance(domain, vals) inst2[-3] = "Bar" self.assertFalse(inst == inst2) def test_instance_id(self): domain = self.create_domain(["x"]) vals = [42] inst = Instance(domain, vals, id=42) self.assertEqual(inst.id, 42) inst2 = Instance(domain, vals) inst3 = Instance(domain, vals) self.assertNotEqual(inst2.id, inst3.id)