Exemplo n.º 1
0
 def setUp(self):
     x = DiscreteVariable("x", list("abc"))
     y = DiscreteVariable("y", list("def"))
     z = DiscreteVariable("z", list("ghijk"))
     self.descs = [owcolor.DiscAttrDesc(v) for v in (x, y, z)]
     self.model = owcolor.DiscColorTableModel()
Exemplo n.º 2
0
    def varcls_modified(self, name):
        var = super().varcls_modified(name)
        var.number_of_decimals = 5
        var.have_date = 1
        var.have_time = 1
        return var


PickleContinuousVariable = create_pickling_tests(
    "PickleContinuousVariable",
    ("with_name", lambda: ContinuousVariable(name="Feature 0")),
)

PickleDiscreteVariable = create_pickling_tests(
    "PickleDiscreteVariable",
    ("with_name", lambda: DiscreteVariable(name="Feature 0")),
    ("with_str_value",
     lambda: DiscreteVariable(name="Feature 0", values=("F", "M"))))

PickleStringVariable = create_pickling_tests(
    "PickleStringVariable",
    ("with_name", lambda: StringVariable(name="Feature 0")))


class VariableTestMakeProxy(unittest.TestCase):
    def test_make_proxy_disc(self):
        abc = DiscreteVariable("abc", values="abc")
        abc1 = abc.make_proxy()
        abc2 = abc1.make_proxy()
        self.assertEqual(abc, abc1)
        self.assertEqual(abc, abc2)
Exemplo n.º 3
0
 def test_no_duplicated_values(self):
     a = DiscreteVariable("foo", values=["a", "b", "c"])
     a.add_value("b")
     self.assertEqual(list(a.values), ["a", "b", "c"])
     self.assertEqual(list(a._value_index), ["a", "b", "c"])
Exemplo n.º 4
0
def vars_from_df(df, role=None, force_nominal=False):
    if role is None and hasattr(df, 'orange_role'):
        role = df.orange_role
    df = _reset_index(df)

    cols = [], [], []
    exprs = [], [], []
    vars_ = [], [], []

    for column in df.columns:
        s = df[column]
        _role = Role.Attribute if role is None else role
        if hasattr(df, 'orange_variables') and column in df.orange_variables:
            original_var = df.orange_variables[column]
            var = original_var.copy(compute_value=None)
            expr = None
        elif _is_datetime(s):
            var = TimeVariable(str(column))
            expr = _convert_datetime
        elif _is_discrete(s, force_nominal):
            discrete = s.astype("category").cat
            var = DiscreteVariable(str(column),
                                   discrete.categories.astype(str).tolist())
            expr = to_categorical
        elif is_numeric_dtype(s):
            var = ContinuousVariable(
                # set number of decimals to 0 if int else keeps default behaviour
                str(column),
                number_of_decimals=(0 if is_integer_dtype(s) else None))
            expr = None
        else:
            if role is not None and role != Role.Meta:
                raise ValueError("String variable must be in metas.")
            _role = Role.Meta
            var = StringVariable(str(column))
            expr = lambda s, _: np.asarray(s, dtype=object)

        cols[_role].append(column)
        exprs[_role].append(expr)
        vars_[_role].append(var)

    xym = []
    for a_vars, a_cols, a_expr in zip(vars_, cols, exprs):
        if not a_cols:
            arr = None if a_cols != cols[0] else np.empty((df.shape[0], 0))
        elif not any(a_expr):
            # if all c in columns table will share memory with dataframe
            a_df = df if all(c in a_cols for c in df.columns) else df[a_cols]
            if all(isinstance(a, SparseDtype) for a in a_df.dtypes):
                arr = csr_matrix(a_df.sparse.to_coo())
            else:
                arr = np.asarray(a_df)
        else:
            # we'll have to copy the table to resolve any expressions
            arr = np.array([
                expr(df[col], var) if expr else np.asarray(df[col])
                for var, col, expr in zip(a_vars, a_cols, a_expr)
            ]).T
        xym.append(arr)

    # Let the tables share memory with pandas frame
    if xym[1] is not None and xym[1].ndim == 2 and xym[1].shape[1] == 1:
        xym[1] = xym[1][:, 0]

    return xym, Domain(*vars_)
Exemplo n.º 5
0
class TestSqlTable(PostgresTest):
    def test_constructs_correct_attributes(self):
        data = list(
            zip(self.float_variable(21), self.discrete_variable(21),
                self.string_variable(21)))
        with self.sql_table_from_data(data) as table:
            self.assertEqual(len(table.domain), 2)
            self.assertEqual(len(table.domain.metas), 1)

            float_attr, discrete_attr = table.domain.variables
            string_attr, = table.domain.metas

            self.assertIsInstance(float_attr, ContinuousVariable)
            self.assertEqual(float_attr.name, "col0")
            self.assertTrue('"col0"' in float_attr.to_sql())

            self.assertIsInstance(discrete_attr, DiscreteVariable)
            self.assertEqual(discrete_attr.name, "col1")
            self.assertTrue('"col1"' in discrete_attr.to_sql())
            self.assertEqual(discrete_attr.values, ['f', 'm'])

            self.assertIsInstance(string_attr, StringVariable)
            self.assertEqual(string_attr.name, "col2")
            self.assertTrue('"col2"' in string_attr.to_sql())

    def test_make_attributes(self):
        table1 = SqlTable(self.conn, self.iris)
        table2 = SqlTable(self.conn, self.iris)
        self.assertEqual(table1.domain[0], table2.domain[0])

    def test_len(self):
        with self.sql_table_from_data(zip(self.float_variable(26))) as table:
            self.assertEqual(len(table), 26)

        with self.sql_table_from_data(zip(self.float_variable(0))) as table:
            self.assertEqual(len(table), 0)

    def test_bool(self):
        with self.sql_table_from_data(()) as table:
            self.assertEqual(bool(table), False)
        with self.sql_table_from_data(zip(self.float_variable(1))) as table:
            self.assertEqual(bool(table), True)

    def test_len_with_filter(self):
        data = zip(self.discrete_variable(26))
        with self.sql_table_from_data(data) as table:
            self.assertEqual(len(table), 26)

            filtered_table = filter.SameValue(table.domain[0], 'm')(table)
            self.assertEqual(len(filtered_table), 13)

            table.domain[0].values.append('x')
            filtered_table = filter.SameValue(table.domain[0], 'x')(table)
            self.assertEqual(len(filtered_table), 0)

    def test_XY_small(self):
        mat = np.random.randint(0, 2, (20, 3))
        conn, table_name = self.create_sql_table(mat)
        sql_table = SqlTable(conn,
                             table_name,
                             type_hints=Domain([],
                                               DiscreteVariable(
                                                   name='col2',
                                                   values=['0', '1', '2'])))
        assert_almost_equal(sql_table.X, mat[:, :2])
        assert_almost_equal(sql_table.Y.flatten(), mat[:, 2])

    @unittest.mock.patch("Orange.data.sql.table.AUTO_DL_LIMIT", 100)
    def test_XY_large(self):
        from Orange.data.sql.table import AUTO_DL_LIMIT as DLL
        mat = np.random.randint(0, 2, (DLL + 100, 3))
        conn, table_name = self.create_sql_table(mat)
        sql_table = SqlTable(conn,
                             table_name,
                             type_hints=Domain([],
                                               DiscreteVariable(
                                                   name='col2',
                                                   values=['0', '1', '2'])))
        self.assertRaises(ValueError, lambda: sql_table.X)
        self.assertRaises(ValueError, lambda: sql_table.Y)
        with self.assertRaises(ValueError):
            sql_table.download_data(DLL + 10)
        # Download partial data
        sql_table.download_data(DLL + 10, partial=True)
        assert_almost_equal(sql_table.X, mat[:DLL + 10, :2])
        assert_almost_equal(sql_table.Y.flatten()[:DLL + 10], mat[:DLL + 10,
                                                                  2])
        # Download all data
        sql_table.download_data()
        assert_almost_equal(sql_table.X, mat[:, :2])
        assert_almost_equal(sql_table.Y.flatten(), mat[:, 2])

    def test_download_data(self):
        mat = np.random.randint(0, 2, (20, 3))
        conn, table_name = self.create_sql_table(mat)
        for member in ('X', 'Y', 'metas', 'W', 'ids'):
            sql_table = SqlTable(conn,
                                 table_name,
                                 type_hints=Domain(
                                     [],
                                     DiscreteVariable(name='col2',
                                                      values=['0', '1', '2'])))
            self.assertFalse(getattr(sql_table, member) is None)
        # has all necessary class members to create a standard Table
        Table(sql_table.domain, sql_table)

    def test_query_all(self):
        table = SqlTable(self.conn, self.iris, inspect_values=True)
        results = list(table)

        self.assertEqual(len(results), 150)

    def test_unavailable_row(self):
        table = SqlTable(self.conn, self.iris)
        self.assertRaises(IndexError, lambda: table[151])

    def test_query_subset_of_attributes(self):
        table = SqlTable(self.conn, self.iris)
        attributes = [
            self._mock_attribute("sepal length"),
            self._mock_attribute("sepal width"),
            self._mock_attribute("double width", '2 * "sepal width"')
        ]
        results = list(table._query(attributes))

        self.assertSequenceEqual(results[:5],
                                 [(5.1, 3.5, 7.0), (4.9, 3.0, 6.0),
                                  (4.7, 3.2, 6.4), (4.6, 3.1, 6.2),
                                  (5.0, 3.6, 7.2)])

    def test_query_subset_of_rows(self):
        table = SqlTable(self.conn, self.iris)
        all_results = list(table._query())

        results = list(table._query(rows=range(10)))
        self.assertEqual(len(results), 10)
        self.assertSequenceEqual(results, all_results[:10])

        results = list(table._query(rows=range(10)))
        self.assertEqual(len(results), 10)
        self.assertSequenceEqual(results, all_results[:10])

        results = list(table._query(rows=slice(None, 10)))
        self.assertEqual(len(results), 10)
        self.assertSequenceEqual(results, all_results[:10])

        results = list(table._query(rows=slice(10, None)))
        self.assertEqual(len(results), 140)
        self.assertSequenceEqual(results, all_results[10:])

    def test_getitem_single_value(self):
        table = SqlTable(self.conn, self.iris, inspect_values=True)
        self.assertAlmostEqual(table[0, 0], 5.1)

    def test_type_hints(self):
        table = SqlTable(self.conn, self.iris, inspect_values=True)
        self.assertEqual(len(table.domain), 5)
        self.assertEqual(len(table.domain.metas), 0)
        table = SqlTable(self.conn,
                         self.iris,
                         inspect_values=True,
                         type_hints=Domain([], [],
                                           metas=[StringVariable("iris")]))
        self.assertEqual(len(table.domain), 4)
        self.assertEqual(len(table.domain.metas), 1)

    def test_joins(self):
        table = SqlTable(self.conn,
                         """SELECT a."sepal length",
                          b. "petal length",
                          CASE WHEN b."petal length" < 3 THEN '<'
                               ELSE '>'
                           END AS "qualitative petal length"
                     FROM iris a
               INNER JOIN iris b ON a."sepal width" = b."sepal width"
                    WHERE a."petal width" < 1
                 ORDER BY a."sepal length", b. "petal length" ASC""",
                         type_hints=Domain([
                             DiscreteVariable(name="qualitative petal length",
                                              values=['<', '>'])
                         ], []))

        self.assertEqual(len(table), 498)
        self.assertAlmostEqual(list(table[497]), [5.8, 1.2, 0.])

    def _mock_attribute(self, attr_name, formula=None):
        if formula is None:
            formula = '"%s"' % attr_name

        class Attr:
            name = attr_name

            @staticmethod
            def to_sql():
                return formula

        return Attr

    def test_universal_table(self):
        _, table_name = self.construct_universal_table()

        SqlTable(
            self.conn, """
            SELECT
                v1.col2 as v1,
                v2.col2 as v2,
                v3.col2 as v3,
                v4.col2 as v4,
                v5.col2 as v5
              FROM %(table_name)s v1
        INNER JOIN %(table_name)s v2 ON v2.col0 = v1.col0 AND v2.col1 = 2
        INNER JOIN %(table_name)s v3 ON v3.col0 = v2.col0 AND v3.col1 = 3
        INNER JOIN %(table_name)s v4 ON v4.col0 = v1.col0 AND v4.col1 = 4
        INNER JOIN %(table_name)s v5 ON v5.col0 = v1.col0 AND v5.col1 = 5
             WHERE v1.col1 = 1
          ORDER BY v1.col0
        """ % dict(table_name='"%s"' % table_name))

        self.drop_sql_table(table_name)

    def construct_universal_table(self):
        values = []
        for row in range(1, 6):
            for col in range(1, 6):
                values.extend((row, col, row * col))
        table = Table(np.array(values).reshape((-1, 3)))
        return self.create_sql_table(table)

    IRIS_VARIABLE = DiscreteVariable(
        "iris", values=['Iris-setosa', 'Iris-virginica', 'Iris-versicolor'])

    def test_class_var_type_hints(self):
        iris = SqlTable(self.conn,
                        self.iris,
                        type_hints=Domain([], self.IRIS_VARIABLE))

        self.assertEqual(len(iris.domain.class_vars), 1)
        self.assertEqual(iris.domain.class_vars[0].name, 'iris')

    def test_metas_type_hints(self):
        iris = SqlTable(self.conn,
                        self.iris,
                        type_hints=Domain([], [], metas=[self.IRIS_VARIABLE]))

        self.assertEqual(len(iris.domain.metas), 1)
        self.assertEqual(iris.domain.metas[0].name, 'iris')

    def test_select_all(self):
        iris = SqlTable(self.conn,
                        "SELECT * FROM iris",
                        type_hints=Domain([], self.IRIS_VARIABLE))

        self.assertEqual(len(iris.domain), 5)

    def test_discrete_bigint(self):
        table = np.arange(6).reshape((-1, 1))
        conn, table_name = self.create_sql_table(table, ['bigint'])

        sql_table = SqlTable(conn, table_name, inspect_values=False)
        self.assertFirstAttrIsInstance(sql_table, ContinuousVariable)

        sql_table = SqlTable(conn, table_name, inspect_values=True)
        self.assertFirstAttrIsInstance(sql_table, DiscreteVariable)

    def test_continous_bigint(self):
        table = np.arange(25).reshape((-1, 1))
        conn, table_name = self.create_sql_table(table, ['bigint'])

        sql_table = SqlTable(conn, table_name, inspect_values=False)
        self.assertFirstAttrIsInstance(sql_table, ContinuousVariable)

        sql_table = SqlTable(conn, table_name, inspect_values=True)
        self.assertFirstAttrIsInstance(sql_table, ContinuousVariable)

    def test_discrete_int(self):
        table = np.arange(6).reshape((-1, 1))
        conn, table_name = self.create_sql_table(table, ['int'])

        sql_table = SqlTable(conn, table_name, inspect_values=False)
        self.assertFirstAttrIsInstance(sql_table, ContinuousVariable)

        sql_table = SqlTable(conn, table_name, inspect_values=True)
        self.assertFirstAttrIsInstance(sql_table, DiscreteVariable)

    def test_continous_int(self):
        table = np.arange(25).reshape((-1, 1))
        conn, table_name = self.create_sql_table(table, ['int'])

        sql_table = SqlTable(conn, table_name, inspect_values=False)
        self.assertFirstAttrIsInstance(sql_table, ContinuousVariable)

        sql_table = SqlTable(conn, table_name, inspect_values=True)
        self.assertFirstAttrIsInstance(sql_table, ContinuousVariable)

    def test_discrete_smallint(self):
        table = np.arange(6).reshape((-1, 1))
        conn, table_name = self.create_sql_table(table, ['smallint'])

        sql_table = SqlTable(conn, table_name, inspect_values=False)
        self.assertFirstAttrIsInstance(sql_table, ContinuousVariable)

        sql_table = SqlTable(conn, table_name, inspect_values=True)
        self.assertFirstAttrIsInstance(sql_table, DiscreteVariable)

    def test_continous_smallint(self):
        table = np.arange(25).reshape((-1, 1))
        conn, table_name = self.create_sql_table(table, ['smallint'])

        sql_table = SqlTable(conn, table_name, inspect_values=False)
        self.assertFirstAttrIsInstance(sql_table, ContinuousVariable)

        sql_table = SqlTable(conn, table_name, inspect_values=True)
        self.assertFirstAttrIsInstance(sql_table, ContinuousVariable)

    def test_boolean(self):
        table = np.array(['F', 'T', 0, 1, 'False', 'True']).reshape(-1, 1)
        conn, table_name = self.create_sql_table(table, ['boolean'])

        sql_table = SqlTable(conn, table_name, inspect_values=False)
        self.assertFirstAttrIsInstance(sql_table, DiscreteVariable)

        sql_table = SqlTable(conn, table_name, inspect_values=True)
        self.assertFirstAttrIsInstance(sql_table, DiscreteVariable)

    def test_discrete_char(self):
        table = np.array(['M', 'F', 'M', 'F', 'M', 'F']).reshape(-1, 1)
        conn, table_name = self.create_sql_table(table, ['char(1)'])

        sql_table = SqlTable(conn, table_name, inspect_values=False)
        self.assertFirstMetaIsInstance(sql_table, StringVariable)

        sql_table = SqlTable(conn, table_name, inspect_values=True)
        self.assertFirstAttrIsInstance(sql_table, DiscreteVariable)

    def test_meta_char(self):
        table = np.array(list('ABCDEFGHIJKLMNOPQRSTUVW')).reshape(-1, 1)
        conn, table_name = self.create_sql_table(table, ['char(1)'])

        sql_table = SqlTable(conn, table_name, inspect_values=False)
        self.assertFirstMetaIsInstance(sql_table, StringVariable)

        sql_table = SqlTable(conn, table_name, inspect_values=True)
        self.assertFirstMetaIsInstance(sql_table, StringVariable)

    def test_discrete_varchar(self):
        table = np.array(['M', 'F', 'M', 'F', 'M', 'F']).reshape(-1, 1)
        conn, table_name = self.create_sql_table(table, ['varchar(1)'])

        sql_table = SqlTable(conn, table_name, inspect_values=False)
        self.assertFirstMetaIsInstance(sql_table, StringVariable)

        sql_table = SqlTable(conn, table_name, inspect_values=True)
        self.assertFirstAttrIsInstance(sql_table, DiscreteVariable)

    def test_meta_varchar(self):
        table = np.array(list('ABCDEFGHIJKLMNOPQRSTUVW')).reshape(-1, 1)
        conn, table_name = self.create_sql_table(table, ['varchar(1)'])

        sql_table = SqlTable(conn, table_name, inspect_values=False)
        self.assertFirstMetaIsInstance(sql_table, StringVariable)

        sql_table = SqlTable(conn, table_name, inspect_values=True)
        self.assertFirstMetaIsInstance(sql_table, StringVariable)

    def test_time_date(self):
        table = np.array([
            '2014-04-12', '2014-04-13', '2014-04-14', '2014-04-15',
            '2014-04-16'
        ]).reshape(-1, 1)
        conn, table_name = self.create_sql_table(table, ['date'])

        sql_table = SqlTable(conn, table_name, inspect_values=False)
        self.assertFirstAttrIsInstance(sql_table, TimeVariable)

        sql_table = SqlTable(conn, table_name, inspect_values=True)
        self.assertFirstAttrIsInstance(sql_table, TimeVariable)

    def test_time_time(self):
        table = np.array([
            '17:39:51', '11:51:48.46', '05:20:21.492149', '21:47:06',
            '04:47:35.8'
        ]).reshape(-1, 1)
        conn, table_name = self.create_sql_table(table, ['time'])

        sql_table = SqlTable(conn, table_name, inspect_values=False)
        self.assertFirstAttrIsInstance(sql_table, TimeVariable)

        sql_table = SqlTable(conn, table_name, inspect_values=True)
        self.assertFirstAttrIsInstance(sql_table, TimeVariable)

    def test_time_timetz(self):
        table = np.array([
            '17:39:51+0200', '11:51:48.46+01', '05:20:21.4921',
            '21:47:06-0600', '04:47:35.8+0330'
        ]).reshape(-1, 1)
        conn, table_name = self.create_sql_table(table, ['timetz'])

        sql_table = SqlTable(conn, table_name, inspect_values=False)
        self.assertFirstAttrIsInstance(sql_table, TimeVariable)

        sql_table = SqlTable(conn, table_name, inspect_values=True)
        self.assertFirstAttrIsInstance(sql_table, TimeVariable)

    def test_time_timestamp(self):
        table = np.array([
            '2014-07-15 17:39:51.348149', '2008-10-05 11:51:48.468149',
            '2008-11-03 05:20:21.492149', '2015-01-02 21:47:06.228149',
            '2016-04-16 04:47:35.892149'
        ]).reshape(-1, 1)
        conn, table_name = self.create_sql_table(table, ['timestamp'])

        sql_table = SqlTable(conn, table_name, inspect_values=False)
        self.assertFirstAttrIsInstance(sql_table, TimeVariable)

        sql_table = SqlTable(conn, table_name, inspect_values=True)
        self.assertFirstAttrIsInstance(sql_table, TimeVariable)

    def test_time_timestamptz(self):
        table = np.array([
            '2014-07-15 17:39:51.348149+0200', '2008-10-05 11:51:48.468149+02',
            '2008-11-03 05:20:21.492149+01', '2015-01-02 21:47:06.228149+0100',
            '2016-04-16 04:47:35.892149+0330'
        ]).reshape(-1, 1)
        conn, table_name = self.create_sql_table(table, ['timestamptz'])

        sql_table = SqlTable(conn, table_name, inspect_values=False)
        self.assertFirstAttrIsInstance(sql_table, TimeVariable)

        sql_table = SqlTable(conn, table_name, inspect_values=True)
        self.assertFirstAttrIsInstance(sql_table, TimeVariable)

    def test_double_precision(self):
        table = np.arange(25).reshape((-1, 1))
        conn, table_name = self.create_sql_table(table, ['double precision'])

        sql_table = SqlTable(conn, table_name, inspect_values=False)
        self.assertFirstAttrIsInstance(sql_table, ContinuousVariable)

        sql_table = SqlTable(conn, table_name, inspect_values=True)
        self.assertFirstAttrIsInstance(sql_table, ContinuousVariable)

    def test_numeric(self):
        table = np.arange(25).reshape((-1, 1))
        conn, table_name = self.create_sql_table(table, ['numeric(15, 2)'])

        sql_table = SqlTable(conn, table_name, inspect_values=False)
        self.assertFirstAttrIsInstance(sql_table, ContinuousVariable)

        sql_table = SqlTable(conn, table_name, inspect_values=True)
        self.assertFirstAttrIsInstance(sql_table, ContinuousVariable)

    def test_real(self):
        table = np.arange(25).reshape((-1, 1))
        conn, table_name = self.create_sql_table(table, ['real'])

        sql_table = SqlTable(conn, table_name, inspect_values=False)
        self.assertFirstAttrIsInstance(sql_table, ContinuousVariable)

        sql_table = SqlTable(conn, table_name, inspect_values=True)
        self.assertFirstAttrIsInstance(sql_table, ContinuousVariable)

    def test_serial(self):
        table = np.arange(25).reshape((-1, 1))
        conn, table_name = self.create_sql_table(table, ['serial'])

        sql_table = SqlTable(conn, table_name, inspect_values=False)
        self.assertFirstAttrIsInstance(sql_table, ContinuousVariable)

        sql_table = SqlTable(conn, table_name, inspect_values=True)
        self.assertFirstAttrIsInstance(sql_table, ContinuousVariable)

    @unittest.skipIf(sql_version < 90200,
                     "Type not supported on this server version.")
    def test_smallserial(self):
        table = np.arange(25).reshape((-1, 1))
        conn, table_name = self.create_sql_table(table, ['smallserial'])

        sql_table = SqlTable(conn, table_name, inspect_values=False)
        self.assertFirstAttrIsInstance(sql_table, ContinuousVariable)

        sql_table = SqlTable(conn, table_name, inspect_values=True)
        self.assertFirstAttrIsInstance(sql_table, ContinuousVariable)

    @unittest.skipIf(sql_version < 90200,
                     "Type not supported on this server version.")
    def test_bigserial(self):
        table = np.arange(25).reshape((-1, 1))
        conn, table_name = self.create_sql_table(table, ['bigserial'])

        sql_table = SqlTable(conn, table_name, inspect_values=False)
        self.assertFirstAttrIsInstance(sql_table, ContinuousVariable)

        sql_table = SqlTable(conn, table_name, inspect_values=True)
        self.assertFirstAttrIsInstance(sql_table, ContinuousVariable)

    def test_text(self):
        table = np.array(list('ABCDEFGHIJKLMNOPQRSTUVW')).reshape((-1, 1))
        conn, table_name = self.create_sql_table(table, ['text'])

        sql_table = SqlTable(conn, table_name, inspect_values=False)
        self.assertFirstMetaIsInstance(sql_table, StringVariable)

        sql_table = SqlTable(conn, table_name, inspect_values=True)
        self.assertFirstMetaIsInstance(sql_table, StringVariable)

    def test_other(self):
        table = np.array([
            'bcd4d9c0-361e-bad4-7ceb-0d171cdec981',
            '544b7ddc-d861-0201-81c8-9f7ad0bbf531',
            'b35a10f7-7901-f313-ec16-5ad9778040a6',
            'b267c4be-4a26-60b5-e664-737a90a40e93'
        ]).reshape(-1, 1)
        conn, table_name = self.create_sql_table(table, ['uuid'])

        sql_table = SqlTable(conn, table_name, inspect_values=False)
        self.assertFirstMetaIsInstance(sql_table, StringVariable)

        sql_table = SqlTable(conn, table_name, inspect_values=True)
        self.assertFirstMetaIsInstance(sql_table, StringVariable)

        filters = filter.Values(
            [filter.FilterString(-1, filter.FilterString.Equal, 'foo')])
        self.assertEqual(len(filters(sql_table)), 0)

    def test_recovers_connection_after_sql_error(self):
        conn, table_name = self.create_sql_table(
            np.arange(25).reshape((-1, 1)))
        sql_table = SqlTable(conn, table_name)

        try:
            broken_query = "SELECT 1/%s FROM %s" % (
                sql_table.domain.attributes[0].to_sql(), sql_table.table_name)
            with sql_table.backend.execute_sql_query(broken_query) as cur:
                cur.fetchall()
        except BackendError:
            pass

        working_query = "SELECT %s FROM %s" % (
            sql_table.domain.attributes[0].to_sql(), sql_table.table_name)
        with sql_table.backend.execute_sql_query(working_query) as cur:
            cur.fetchall()

    def test_basic_stats(self):
        iris = SqlTable(self.conn, self.iris, inspect_values=True)
        stats = BasicStats(iris, iris.domain['sepal length'])
        self.assertAlmostEqual(stats.min, 4.3)
        self.assertAlmostEqual(stats.max, 7.9)
        self.assertAlmostEqual(stats.mean, 5.8, 1)
        self.assertEqual(stats.nans, 0)
        self.assertEqual(stats.non_nans, 150)

        domain_stats = DomainBasicStats(iris, include_metas=True)
        self.assertEqual(len(domain_stats.stats),
                         len(iris.domain) + len(iris.domain.metas))
        stats = domain_stats['sepal length']
        self.assertAlmostEqual(stats.min, 4.3)
        self.assertAlmostEqual(stats.max, 7.9)
        self.assertAlmostEqual(stats.mean, 5.8, 1)
        self.assertEqual(stats.nans, 0)
        self.assertEqual(stats.non_nans, 150)

    @unittest.mock.patch("Orange.data.sql.table.LARGE_TABLE", 100)
    def test_basic_stats_on_large_data(self):
        # By setting LARGE_TABLE to 100, iris will be treated as
        # a large table and sampling will be used. As the table
        # is actually small, time base sampling should return
        # all rows, so the same assertions can be used.
        self.test_basic_stats()

    def test_distributions(self):
        iris = SqlTable(self.conn, self.iris, inspect_values=True)

        dists = get_distributions(iris)
        self.assertEqual(len(dists), 5)
        dist = dists[0]
        self.assertAlmostEqual(dist.min(), 4.3)
        self.assertAlmostEqual(dist.max(), 7.9)
        self.assertAlmostEqual(dist.mean(), 5.8, 1)

    def test_contingencies(self):
        iris = SqlTable(self.conn, self.iris, inspect_values=True)
        iris.domain = Domain(
            iris.domain[:2] +
            (EqualWidth()(iris, iris.domain['sepal width']), ),
            iris.domain['iris'])

        conts = get_contingencies(iris)
        self.assertEqual(len(conts), 3)
        self.assertIsInstance(conts[0], Continuous)
        self.assertIsInstance(conts[1], Continuous)
        self.assertIsInstance(conts[2], Discrete)

    def test_pickling_restores_connection_pool(self):
        iris = SqlTable(self.conn, self.iris, inspect_values=True)
        iris2 = pickle.loads(pickle.dumps(iris))

        self.assertEqual(iris[0], iris2[0])

    def test_list_tables_with_schema(self):
        with self.backend.execute_sql_query(
                "DROP SCHEMA IF EXISTS orange_tests CASCADE") as cur:
            cur.execute("CREATE SCHEMA orange_tests")
            cur.execute("CREATE TABLE orange_tests.efgh (id int)")
            cur.execute("INSERT INTO orange_tests.efgh (id) VALUES (1)")
            cur.execute("INSERT INTO orange_tests.efgh (id) VALUES (2)")

        try:
            tables = self.backend.list_tables("orange_tests")
            self.assertTrue(any([t.name == "efgh" for t in tables]))
            SqlTable(self.conn, tables[0], inspect_values=True)
        finally:
            with self.backend.execute_sql_query(
                    "DROP SCHEMA IF EXISTS orange_tests CASCADE"):
                pass

    def assertFirstAttrIsInstance(self, table, variable_type):
        self.assertGreater(len(table.domain), 0)
        attr = table.domain[0]
        self.assertIsInstance(attr, variable_type)

    def assertFirstMetaIsInstance(self, table, variable_type):
        self.assertGreater(len(table.domain.metas), 0)
        attr = table.domain[-1]
        self.assertIsInstance(attr, variable_type)
Exemplo n.º 6
0
    def send_data(self):
        if self.optimize_k:
            row = self.selected_row()
            k = self.k_from + row if row is not None else None
        else:
            k = self.k

        km = self.clusterings.get(k)
        if self.data is None or km is None or isinstance(km, str):
            self.Outputs.annotated_data.send(None)
            self.Outputs.centroids.send(None)
            return

        domain = self.data.domain
        cluster_var = DiscreteVariable(
            get_unique_names(domain, "Cluster"),
            values=["C%d" % (x + 1) for x in range(km.k)])
        clust_ids = km.labels
        silhouette_var = ContinuousVariable(
            get_unique_names(domain, "Silhouette"))
        if len(self.data) <= SILHOUETTE_MAX_SAMPLES:
            self.Warning.no_silhouettes.clear()
            scores = self.samples_scores(clust_ids)
            clust_scores = []
            for i in range(km.k):
                in_clust = clust_ids == i
                if in_clust.any():
                    clust_scores.append(np.mean(scores[in_clust]))
                else:
                    clust_scores.append(0.)
            clust_scores = np.atleast_2d(clust_scores).T
        else:
            self.Warning.no_silhouettes()
            scores = np.nan
            clust_scores = np.full((km.k, 1), np.nan)

        new_domain = add_columns(domain, metas=[cluster_var, silhouette_var])
        new_table = self.data.transform(new_domain)
        new_table.get_column_view(cluster_var)[0][:] = clust_ids
        new_table.get_column_view(silhouette_var)[0][:] = scores

        centroid_attributes = [
            attr.compute_value.variable
            if isinstance(attr.compute_value, ReplaceUnknowns)
            and attr.compute_value.variable in domain.attributes else attr
            for attr in km.domain.attributes
        ]
        centroid_domain = add_columns(Domain(centroid_attributes, [],
                                             domain.metas),
                                      metas=[cluster_var, silhouette_var])
        centroids = Table(
            centroid_domain, km.centroids, None,
            np.hstack((np.full((km.k, len(domain.metas)), np.nan),
                       np.arange(km.k).reshape(km.k, 1), clust_scores)))
        if self.data.name == Table.name:
            centroids.name = "centroids"
        else:
            centroids.name = f"{self.data.name} centroids"

        self.Outputs.annotated_data.send(new_table)
        self.Outputs.centroids.send(centroids)
Exemplo n.º 7
0
 def test_discrete(self):
     D = DiscreteVariable("D", values=("a", "b"))
     self._test_common(D)
Exemplo n.º 8
0
                    is_inside = not is_inside
        return is_inside

    clusters = [None] * len(coordinates)
    for cluster, hull in hulls.items():
        for i, c in enumerate(coordinates.X):
            if point_in_polygon_test(c, hull):
                clusters[i] = cluster

    if cluster_attribute is not None:
        assert all(
            i in cluster_attribute.values for i in set(clusters) -
            {None}), "cluster_attribute does not have all required values."
    # create the table
    new_domain = Domain([
        DiscreteVariable("Clusters", values=sorted(list(hulls.keys())))
        if cluster_attribute is None else cluster_attribute
    ])
    return Table(
        new_domain,
        np.array(list(map(new_domain[0].to_val, clusters))).reshape(-1, 1))


if __name__ == "__main__":
    # run hull creation at Iris data
    data = Table("iris")[:, 2:4]
    clustered_data = Table(
        Domain([DiscreteVariable("cl", values=["1", "2", "3"])]),
        [[0]] * 50 + [[1]] * 50 + [[2]] * 50)
    compute_concave_hulls(data, clustered_data, epsilon=0.5)
Exemplo n.º 9
0
def assign_labels(clusters, annotations, labels_per_cluster):
    """
    This function assigns a certain number of labels per cluster. Each cluster
    gets `labels_per_cluster` number of most common labels in cluster assigned.

    Parameters
    ----------
    clusters : Orange.data.Table
        Cluster indices for each item.
    annotations : Orange.data.Table
        Table with annotations and their probabilities.
    labels_per_cluster : int
        Number of labels that need to be assigned to each cluster.

    Returns
    -------
    dict
        Dictionary with cluster index as a key and list of annotations as a
        value. Each list include tuples with the annotation name and their
        proportion in the cluster.
    Orange.data.Table
        The array with the annotation assigned to the item.
    """
    clusters_unique = set(clusters.domain[0].values)

    if len(annotations.domain) == 0:
        return {}, Table(Domain([DiscreteVariable("Annotation", values=[])]),
                         np.ones((len(clusters), 1)) * np.nan)

    labels = np.array(list(map(str, annotations.domain.attributes)))

    # remove rows with all nans
    nan_mask = np.isnan(annotations.X).all(axis=1)
    ann_not_nan = annotations.X[~nan_mask]

    # find indices and labels
    annotation_best_idx = np.nanargmax(ann_not_nan, axis=1)
    annotation_best = labels[annotation_best_idx]

    # join back together
    items_annotations = np.empty(annotations.X.shape[0], dtype=labels.dtype)
    items_annotations[~nan_mask] = annotation_best

    annotations_clusters = {}
    for cl in clusters_unique:
        mask = np.array(
            list(map(clusters.domain.attributes[0].repr_val,
                     clusters.X[:, 0]))).flatten() == cl
        labels_cl = items_annotations[mask]
        # remove nans from labels
        labels_cl_filtered = labels_cl[~(labels_cl == "")]

        counts = Counter(labels_cl_filtered)
        common_labels = counts.most_common(labels_per_cluster)

        if len(common_labels) > 0:
            annotations_clusters[cl] = [(l, c / len(labels_cl))
                                        for l, c in common_labels]

    # pack item annotations to Table
    nan_mask = items_annotations == ""
    values, indices = np.unique(items_annotations[~nan_mask],
                                return_inverse=True)
    corrected_idx = np.ones(items_annotations.shape) * np.nan
    corrected_idx[~nan_mask] = indices
    domain = Domain([DiscreteVariable("Annotation", values=values)])
    item_annotations = Table(domain, corrected_idx.reshape((-1, 1)))

    return annotations_clusters, item_annotations
Exemplo n.º 10
0
    def test_nonunique(self):
        widget = self.widget
        x = ContinuousVariable("x")
        d = DiscreteVariable("d", values=list("abc"))
        domain = Domain([x, d], [])
        dataA = Table.from_numpy(domain, np.array([[1.0, 0], [1, 1], [2, 1]]))
        dataB = Table.from_numpy(domain, np.array([[1.0, 0], [2, 1], [3, 1]]))
        dataB.ids = dataA.ids
        self.send_signal(widget.Inputs.data, dataA)
        self.send_signal(widget.Inputs.extra_data, dataB)
        widget.merging = widget.InnerJoin

        self.assertFalse(widget.Error.nonunique_left.is_shown())
        self.assertFalse(widget.Error.nonunique_right.is_shown())

        widget.attr_boxes.set_state([(INSTANCEID, INSTANCEID)])
        widget.unconditional_commit()
        self.assertFalse(widget.Error.nonunique_left.is_shown())
        self.assertFalse(widget.Error.nonunique_right.is_shown())
        self.assertIsNotNone(self.get_output(widget.Outputs.data))

        widget.attr_boxes.set_state([(INDEX, INDEX)])
        widget.unconditional_commit()
        self.assertFalse(widget.Error.nonunique_left.is_shown())
        self.assertFalse(widget.Error.nonunique_right.is_shown())
        self.assertIsNotNone(self.get_output(widget.Outputs.data))

        widget.attr_boxes.set_state([(x, x)])
        widget.unconditional_commit()
        self.assertTrue(widget.Error.nonunique_left.is_shown())
        self.assertFalse(widget.Error.nonunique_right.is_shown())
        self.assertIsNone(self.get_output(widget.Outputs.data))

        widget.merging = widget.LeftJoin
        widget.unconditional_commit()
        self.assertFalse(widget.Error.nonunique_left.is_shown())
        self.assertFalse(widget.Error.nonunique_right.is_shown())
        self.assertIsNotNone(self.get_output(widget.Outputs.data))

        widget.merging = widget.InnerJoin
        widget.attr_boxes.set_state([(x, x), (d, d)])
        widget.unconditional_commit()
        self.assertFalse(widget.Error.nonunique_left.is_shown())
        self.assertFalse(widget.Error.nonunique_right.is_shown())
        self.assertIsNotNone(self.get_output(widget.Outputs.data))

        widget.attr_boxes.set_state([(d, d)])
        widget.unconditional_commit()
        self.assertTrue(widget.Error.nonunique_left.is_shown())
        self.assertTrue(widget.Error.nonunique_right.is_shown())
        self.assertIsNone(self.get_output(widget.Outputs.data))

        widget.merging = widget.LeftJoin
        widget.unconditional_commit()
        self.assertFalse(widget.Error.nonunique_left.is_shown())
        self.assertTrue(widget.Error.nonunique_right.is_shown())
        self.assertIsNone(self.get_output(widget.Outputs.data))

        widget.merging = widget.InnerJoin
        widget.unconditional_commit()
        self.assertTrue(widget.Error.nonunique_left.is_shown())
        self.assertTrue(widget.Error.nonunique_right.is_shown())
        self.assertIsNone(self.get_output(widget.Outputs.data))

        self.send_signal(widget.Inputs.data, None)
        self.send_signal(widget.Inputs.extra_data, None)
        self.assertFalse(widget.Error.nonunique_left.is_shown())
        self.assertFalse(widget.Error.nonunique_right.is_shown())
        self.assertIsNone(self.get_output(widget.Outputs.data))
Exemplo n.º 11
0
VarDataPair = namedtuple('VarDataPair', ['variable', 'data'])

# Continuous variable variations
continuous_full = VarDataPair(
    ContinuousVariable('continuous_full'),
    np.array([0, 1, 2, 3, 4], dtype=float),
)
continuous_missing = VarDataPair(
    ContinuousVariable('continuous_missing'),
    np.array([0, 1, 2, np.nan, 4], dtype=float),
)

# Unordered discrete variable variations
rgb_full = VarDataPair(
    DiscreteVariable('rgb_full', values=('r', 'g', 'b')),
    np.array([0, 1, 1, 1, 2], dtype=float),
)
rgb_missing = VarDataPair(
    DiscreteVariable('rgb_missing', values=('r', 'g', 'b')),
    np.array([0, 1, 1, np.nan, 2], dtype=float),
)

# Ordered discrete variable variations
ints_full = VarDataPair(
    DiscreteVariable('ints_full', values=('2', '3', '4'), ordered=True),
    np.array([0, 1, 1, 1, 2], dtype=float),
)
ints_missing = VarDataPair(
    DiscreteVariable('ints_missing', values=('2', '3', '4'), ordered=True),
    np.array([0, 1, 1, np.nan, 2], dtype=float),
Exemplo n.º 12
0
    def test_match_attr_name(self):
        widget = self.widget
        row = widget.attr_boxes.rows[0]
        data_combo, extra_combo = row.left_combo, row.right_combo

        domainA = Domain(
            [
                DiscreteVariable("dA1", ("a", "b", "c", "d")),
                DiscreteVariable("dA2", ("aa", "bb")),
                DiscreteVariable("dA3", ("aa", "bb"))
            ], DiscreteVariable("cls", ("aaa", "bbb", "ccc")),
            [DiscreteVariable("mA1", ("cc", "dd")),
             StringVariable("mA2")])
        XA = np.array([[0, 0, 0], [1, 1, 0], [2, 0, 0], [3, 1, 0]])
        yA = np.array([0, 1, 2, np.nan])
        metasA = np.array([[0.0, "m1"], [1.0, "m2"], [np.nan, "m3"],
                           [0.0, "m4"]]).astype(object)

        domainB = Domain(
            [
                DiscreteVariable("dB1", values=("a", "b", "c")),
                ContinuousVariable("dA2")
            ], None,
            [StringVariable("cls"),
             DiscreteVariable("dA1", ("m4", "m5"))])
        XB = np.array([[0, 0], [1, 1], [2, np.nan]])
        yB = np.empty((3, 0))
        metasB = np.array([[np.nan, np.nan], [1, 1], [0, 0]]).astype(object)
        dataA = Table(domainA, XA, yA, metasA)
        dataA.name = 'dataA'
        dataA.attributes = 'dataA attributes'
        dataB = Table(domainB, XB, yB, metasB)
        dataB.name = 'dataB'
        dataB.attributes = 'dataB attributes'

        self.send_signal(widget.Inputs.data, dataA)
        self.send_signal(widget.Inputs.extra_data, dataB)

        # match variable if available and the other combo is Row Index
        extra_combo.setCurrentIndex(0)
        extra_combo.activated.emit(0)
        data_combo.setCurrentIndex(2)
        data_combo.activated.emit(2)
        self.assertEqual(extra_combo.currentIndex(), 5)

        # match variable if available and the other combo is ID
        extra_combo.setCurrentIndex(1)
        extra_combo.activated.emit(1)
        data_combo.setCurrentIndex(2)
        data_combo.activated.emit(2)
        self.assertEqual(extra_combo.currentIndex(), 5)

        # don't match variable if other combo is set
        extra_combo.setCurrentIndex(4)
        extra_combo.activated.emit(4)
        data_combo.setCurrentIndex(2)
        data_combo.activated.emit(2)
        self.assertEqual(extra_combo.currentIndex(), 4)

        # don't match if nothing to match to
        extra_combo.setCurrentIndex(0)
        extra_combo.activated.emit(0)
        data_combo.setCurrentIndex(4)
        data_combo.activated.emit(4)
        self.assertEqual(extra_combo.currentIndex(), 0)

        # don't match numeric with non-numeric
        extra_combo.setCurrentIndex(0)
        extra_combo.activated.emit(0)
        data_combo.setCurrentIndex(3)
        data_combo.activated.emit(3)
        self.assertEqual(extra_combo.currentIndex(), 0)

        # allow matching string with discrete
        extra_combo.setCurrentIndex(0)
        extra_combo.activated.emit(0)
        data_combo.setCurrentIndex(5)
        data_combo.activated.emit(5)
        self.assertEqual(extra_combo.currentIndex(), 4)
Exemplo n.º 13
0
    def __get_pivot_tab_domain(self, val_var, X, X_h, X_v, X_t, agg_funs):
        def map_values(index, _X):
            values = np.unique(_X[:, index])
            values = np.delete(values, np.where(values == "nan")[0])
            for j, value in enumerate(values):
                _X[:, index][_X[:, index] == value] = j
            return values

        create_time_var = \
            isinstance(val_var, TimeVariable) and \
            all(fun in self.TimeVarFunctions for fun in agg_funs)
        create_cont_var = \
            not val_var or val_var.is_continuous and \
            (not isinstance(val_var, TimeVariable) or
             all(fun in self.FloatFunctions for fun in agg_funs))

        vals = np.array(self._col_var.values)[self._col_var_groups.astype(int)]
        if create_time_var:
            kwargs = {
                "have_date": val_var.have_date,
                "have_time": val_var.have_time
            }
            attrs = [[TimeVariable(f"{v}", **kwargs) for v in vals]] * 2
            attrs.extend([[TimeVariable("Total", **kwargs)]] * 2)
        elif create_cont_var:
            attrs = [[ContinuousVariable(f"{v}", 1) for v in vals]] * 2
            attrs.extend([[ContinuousVariable("Total", 1)]] * 2)
        else:
            attrs = []
            for x in (X, X_h):
                attrs.append([
                    DiscreteVariable(f"{v}", map_values(i, x))
                    for i, v in enumerate(vals, 2)
                ])
            for x in (X_v, X_t):
                attrs.append([DiscreteVariable("Total", map_values(0, x))])
        row_var_h = DiscreteVariable(self._row_var.name, values=["Total"])
        aggr_attr = DiscreteVariable('Aggregate', [str(f) for f in agg_funs])

        same_row_col = self._col_var is self._row_var

        extra_vars = [self._row_var, aggr_attr]
        uniq_a = get_unique_names_duplicates([v.name for v in extra_vars] +
                                             [atr.name for atr in attrs[0]])
        for (idx, var), u in zip(enumerate(chain(extra_vars, attrs[0])),
                                 uniq_a):
            if var.name == u:
                continue
            if idx == 0:
                self.renamed.append(self._row_var.name)
                self._row_var = self._row_var.copy(name=u)
                if same_row_col:
                    self._col_var = self._row_var
                row_var_h = row_var_h.copy(name=u)
            elif idx == 1:
                self.renamed.append(aggr_attr.name)
                aggr_attr = aggr_attr.copy(name=u)
            else:
                self.renamed.append(var.name)
                attrs[0][idx - 2] = var.copy(name=u)
                attrs[1][idx - 2] = var.copy(name=u)

        if same_row_col:
            vals = tuple(v.name for v in attrs[0])
            self._row_var.make(self._row_var.name, values=vals)
            vals = tuple(v.name for v in attrs[2])
            row_var_h.make(row_var_h.name, vals)

        return (Domain([self._row_var, aggr_attr] + attrs[0]),
                Domain([row_var_h, aggr_attr] + attrs[1]), Domain(attrs[2]),
                Domain(attrs[3]))
Exemplo n.º 14
0
 def setUp(self):
     self.var = DiscreteVariable("x", ["a", "b", "c"])
     self.desc = owcolor.DiscAttrDesc(self.var)
Exemplo n.º 15
0
 def setUp(self):
     self.domain = Domain([DiscreteVariable(c) for c in "abc"])
     self.data = Table(self.domain,
                       [[0, 1, 1], [1, 1, 1], [1, 0, 1], [1, 0, 0]])
Exemplo n.º 16
0
def cluster_additional_points(coordinates, hulls, cluster_attribute=None):
    """
    This function receives additional points and assign them current existing
    clusters based on current concave hull.

    Parameters
    ----------
    coordinates : Orange.data.Table
        Visualisation coordinates - embeddings
    hulls : dict
        Concave hull for each cluster
    cluster_attribute : Orange.data.DiscreteVariable (optional)
        A variable for clusters. If cluster_attribute is provided it will be
        used in the creation of the resulting Table.

    Returns
    -------
    Orange.data.Table
        Cluster label for each point
    """
    def point_in_polygon_test(test_point, polygon_points):
        """
        This function uses the horizontal ray casting to find out if the point
        is in the hull/polygon. For each point, it tests how many times the
        horizontal ray from test_point to infinity crosses the polygon edge. If
        it happens odd many times the point is in the polygon.
        https://stackoverflow.com/a/2922778/3551700
        """
        test_x = test_point[0]
        test_y = test_point[1]
        # flipping bool from True to False is similar to counting odd numbers
        # of intersections. If it will be True at the end odd number of
        # intersections happened
        is_inside = False

        for (x1, y1), (x2, y2) in zip(
                polygon_points,
                np.concatenate((polygon_points[1:], polygon_points[:1]),
                               axis=0)):
            # ray crosses the edge if test_y between both y from an edge
            # and if intersection on the right of the test_x
            if (y1 > test_y) != (y2 > test_y):
                # compute the intersection between the horizontal ray and
                # polygon edge
                intersection_x = (x2 - x1) * (test_y - y1) / (y2 - y1) + x1
                if test_x < intersection_x:
                    is_inside = not is_inside
        return is_inside

    clusters = [None] * len(coordinates)
    for cluster, hull in hulls.items():
        for i, c in enumerate(coordinates.X):
            if point_in_polygon_test(c, hull):
                clusters[i] = cluster

    if cluster_attribute is not None:
        assert all(
            i in cluster_attribute.values for i in set(clusters) -
            {None}), "cluster_attribute does not have all required values."
    # create the table
    new_domain = Domain([
        DiscreteVariable("Clusters", values=sorted(list(hulls.keys())))
        if cluster_attribute is None else cluster_attribute
    ])
    return Table(
        new_domain,
        np.array(list(map(new_domain[0].to_val, clusters))).reshape(-1, 1))
import warnings
from unittest import TestCase
from unittest.mock import Mock
from Orange.data import Domain, DiscreteVariable
from Orange.data import ContinuousVariable
from Orange.util import OrangeDeprecationWarning
from Orange.widgets.settings import DomainContextHandler, ContextSetting
from Orange.widgets.utils import vartype

Continuous = vartype(ContinuousVariable())
Discrete = vartype(DiscreteVariable())


class TestDomainContextHandler(TestCase):
    def setUp(self):
        self.domain = Domain(
            attributes=[ContinuousVariable('c1'),
                        DiscreteVariable('d1', values='abc'),
                        DiscreteVariable('d2', values='def')],
            class_vars=[DiscreteVariable('d3', values='ghi')],
            metas=[ContinuousVariable('c2'),
                   DiscreteVariable('d4', values='jkl')]
        )
        self.args = (self.domain,
                     {'c1': Continuous, 'd1': Discrete,
                      'd2': Discrete, 'd3': Discrete},
                     {'c2': Continuous, 'd4': Discrete, })
        self.handler = DomainContextHandler()
        self.handler.read_defaults = lambda: None

    def test_encode_domain_with_match_none(self):
Exemplo n.º 18
0
def table_from_frame(df, class_name, *, force_nominal=False):
    """
    Convert pandas.DataFrame to Orange.data.Table

    Parameters
    ----------
    df : pandas.DataFrame
    force_nominal : boolean
        If True, interpret ALL string columns as nominal (DiscreteVariable).

    Returns
    -------
    Table
    """
    def _is_discrete(s):
        return (is_categorical_dtype(s) or is_object_dtype(s) and
                (force_nominal or s.nunique() < s.size**.666))

    def _is_datetime(s):
        if is_datetime64_any_dtype(s):
            return True
        try:
            if is_object_dtype(s):
                pd.to_datetime(s, infer_datetime_format=True)
                return True
        except Exception:  # pylint: disable=broad-except
            pass
        return False

    # If df index is not a simple RangeIndex (or similar), put it into data
    if not (df.index.is_integer() and (df.index.is_monotonic_increasing
                                       or df.index.is_monotonic_decreasing)):
        df = df.reset_index()

    attrs, metas, calss_vars = [], [], []
    X, M = [], []

    # Iter over columns
    for name, s in df.items():
        name = str(name)
        if name == class_name:
            discrete = s.astype('category').cat
            calss_vars.append(
                DiscreteVariable(name,
                                 discrete.categories.astype(str).tolist()))
            X.append(discrete.codes.replace(-1, np.nan).values)
        elif _is_discrete(s):
            discrete = s.astype('category').cat
            attrs.append(
                DiscreteVariable(name,
                                 discrete.categories.astype(str).tolist()))
            X.append(discrete.codes.replace(-1, np.nan).values)
        elif _is_datetime(s):
            tvar = TimeVariable(name)
            attrs.append(tvar)
            s = pd.to_datetime(s, infer_datetime_format=True)
            X.append(
                s.astype('str').replace('NaT', np.nan).map(tvar.parse).values)
        elif is_numeric_dtype(s):
            attrs.append(ContinuousVariable(name))
            X.append(s.values)
        else:
            metas.append(StringVariable(name))
            M.append(s.values.astype(object))

    return Table.from_numpy(
        Domain(attrs, calss_vars, metas),
        np.column_stack(X) if X else np.empty((df.shape[0], 0)), None,
        np.column_stack(M) if M else None)
Exemplo n.º 19
0
 def test_value_from_discrete_substring(self):
     trans = ValueFromDiscreteSubstring(
         DiscreteVariable("x", values=self.arr), self.patterns)
     np.testing.assert_equal(trans.lookup_table, [0, 1, 2, 0, 3])
Exemplo n.º 20
0
from orangewidget.widget import StateInfo

from Orange.data import Table, ContinuousVariable, DiscreteVariable, Domain
from Orange.widgets.settings import ContextSetting
from Orange.widgets.utils import vartype
from Orange.widgets.utils.state_summary import format_summary_details
from Orange.widgets.tests.base import WidgetTest
from Orange.widgets.data.owselectcolumns \
    import OWSelectAttributes, VariablesListItemModel, \
    SelectAttributesDomainContextHandler
from Orange.widgets.data.owrank import OWRank
from Orange.widgets.widget import AttributeList

Continuous = vartype(ContinuousVariable("c"))
Discrete = vartype(DiscreteVariable("d"))


class TestSelectAttributesDomainContextHandler(TestCase):
    def setUp(self):
        self.domain = Domain(attributes=[
            ContinuousVariable('c1'),
            DiscreteVariable('d1', values='abc'),
            DiscreteVariable('d2', values='def')
        ],
                             class_vars=[DiscreteVariable('d3', values='ghi')],
                             metas=[
                                 ContinuousVariable('c2'),
                                 DiscreteVariable('d4', values='jkl')
                             ])
        self.args = (self.domain, {
Exemplo n.º 21
0
 def test_discrete_rename(self):
     D = DiscreteVariable("D", values=("a", "b"))
     DD = apply_transform_var(D,
                              [CategoriesMapping((("a", "A"), ("b", "B")))])
     self.assertSequenceEqual(DD.values, ["A", "B"])
     self.assertIs(DD.compute_value.variable, D)
Exemplo n.º 22
0
class TestSparseTablePandas(TestTablePandas):
    features = (
        ContinuousVariable(name="c2"),
        ContinuousVariable(name="Continuous Feature 2"),
        DiscreteVariable(name="d1", values=("0", "1")),
        DiscreteVariable(name="Discrete Feature 2",
                         values=("value1", "value2")),
    )

    class_vars = (ContinuousVariable(name="Continuous Class"),
                  DiscreteVariable(name="Discrete Class", values=("m", "f")))

    feature_data = (
        (1, 0, 0, 0),
        (0, 1, 0, 0),
        (0, 1, 1, 0),
        (0, 0, 0, 0),
        (0, 1, 1, 0),
        (0, 0, 0, 0),
        (0, 1, 1, 0),
    )

    class_data = (
        (1, 0),
        (0, 1),
        (1, 0),
        (0, 1),
        (1, 0),
        (0, 1),
        (1, 0),
    )

    def setUp(self):
        self.domain = Domain(attributes=self.features,
                             class_vars=self.class_vars)
        table = Table.from_numpy(
            self.domain,
            np.array(self.feature_data),
            np.array(self.class_data),
        )
        self.table = Table.from_numpy(self.domain,
                                      csr_matrix(table.X),
                                      csr_matrix(table.Y),
                                      W=np.array([1, 0, 1, 0, 1, 1, 1]))

        def arreq(t1, t2):
            if all(sp.issparse(t) for t in (t1, t2)):
                return self.assertEqual((t1 != t2).nnz, 0)
            else:
                return np.array_equal(t1, t2)

        self.__arreq__ = arreq

    def test_to_dense(self):
        df = self.table.X_df

        self.assertIsInstance(df, OrangeDataFrame)

        ddf = df.sparse.to_dense()
        np.testing.assert_array_equal(df.index, ddf.index)
        np.testing.assert_array_equal(df.orange_variables,
                                      ddf.orange_variables)
        np.testing.assert_array_equal(df.orange_attributes,
                                      ddf.orange_attributes)
        np.testing.assert_array_equal(df.orange_role, ddf.orange_role)
        np.testing.assert_array_equal(df.orange_weights, ddf.orange_weights)

        table = self.table.to_dense()
        table2 = ddf.to_orange_table()

        np.testing.assert_array_equal(table2.X, table.X)
        np.testing.assert_array_equal(table2.ids, table.ids)
        np.testing.assert_array_equal(table2.W, table.W)
        np.testing.assert_array_equal(table2.attributes, table.attributes)
Exemplo n.º 23
0
 def test_init(self):
     var = DiscreteVariable(name="fold", values="abc")
     res = CrossValidationFeature(feature=var)
     self.assertIs(res.feature, var)
Exemplo n.º 24
0
                         output_csv.getvalue().splitlines())

    def test_repr_value(self):
        # https://github.com/biolab/orange3/pull/1760
        var = TimeVariable('time')
        self.assertEqual(var.repr_val(Value(var, 416.3)), '416.3')


PickleContinuousVariable = create_pickling_tests(
    "PickleContinuousVariable",
    ("with_name", lambda: ContinuousVariable(name="Feature 0")),
)

PickleDiscreteVariable = create_pickling_tests(
    "PickleDiscreteVariable",
    ("with_name", lambda: DiscreteVariable(name="Feature 0")),
    ("with_int_values",
     lambda: DiscreteVariable(name="Feature 0", values=[1, 2, 3])),
    ("with_str_value",
     lambda: DiscreteVariable(name="Feature 0", values=["F", "M"])),
    ("ordered", lambda: DiscreteVariable(
        name="Feature 0", values=["F", "M"], ordered=True)),
    ("with_base_value", lambda: DiscreteVariable(
        name="Feature 0", values=["F", "M"], base_value=0)))

PickleStringVariable = create_pickling_tests(
    "PickleStringVariable",
    ("with_name", lambda: StringVariable(name="Feature 0")))


@variabletest(DiscreteVariable)
Exemplo n.º 25
0
import warnings
from distutils.version import LooseVersion
from unittest import TestCase
from unittest.mock import Mock

import Orange
from Orange.data import Domain, DiscreteVariable
from Orange.data import ContinuousVariable
from Orange.util import OrangeDeprecationWarning
from Orange.widgets.settings import DomainContextHandler, ContextSetting
from Orange.widgets.utils import vartype

Continuous = 100 + vartype(ContinuousVariable("x"))
Discrete = 100 + vartype(DiscreteVariable("x"))


class TestDomainContextHandler(TestCase):
    def setUp(self):
        self.domain = Domain(attributes=[
            ContinuousVariable('c1'),
            DiscreteVariable('d1', values='abc'),
            DiscreteVariable('d2', values='def')
        ],
                             class_vars=[DiscreteVariable('d3', values='ghi')],
                             metas=[
                                 ContinuousVariable('c2'),
                                 DiscreteVariable('d4', values='jkl')
                             ])
        self.args = (self.domain, {
            'c1': Continuous - 100,
            'd1': Discrete - 100,
Exemplo n.º 26
0
continuous_all_missing = VarDataPair(
    ContinuousVariable('continuous_all_missing'),
    np.array([np.nan] * 5, dtype=float),
)
continuous_same = VarDataPair(
    ContinuousVariable('continuous_same'),
    np.array([3] * 5, dtype=float),
)
continuous = [
    continuous_full, continuous_missing, continuous_all_missing,
    continuous_same
]

# Unordered discrete variable variations
rgb_full = VarDataPair(
    DiscreteVariable('rgb_full', values=['r', 'g', 'b']),
    np.array([0, 1, 1, 1, 2], dtype=float),
)
rgb_missing = VarDataPair(
    DiscreteVariable('rgb_missing', values=['r', 'g', 'b']),
    np.array([0, 1, 1, np.nan, 2], dtype=float),
)
rgb_all_missing = VarDataPair(
    DiscreteVariable('rgb_all_missing', values=['r', 'g', 'b']),
    np.array([np.nan] * 5, dtype=float),
)
rgb_bins_missing = VarDataPair(
    DiscreteVariable('rgb_bins_missing', values=['r', 'g', 'b']),
    np.array([np.nan, 1, 1, 1, np.nan], dtype=float),
)
rgb_same = VarDataPair(
Exemplo n.º 27
0
 def test_no_nonstringvalues(self):
     self.assertRaises(TypeError, DiscreteVariable, "foo", values=("a", 42))
     a = DiscreteVariable("foo", values=("a", "b", "c"))
     self.assertRaises(TypeError, a.add_value, 42)
Exemplo n.º 28
0
    def test_colors_diff_domain(self):
        """
        Test whether the color selection for values is correct.
        """
        # pylint: disable=protected-access
        self.send_signal(self.widget.Inputs.data, self.iris)

        # case 1: two domains one subset other
        idom = self.iris.domain
        dom1 = Domain(
            idom.attributes,
            DiscreteVariable(idom.class_var.name, idom.class_var.values)
        )
        dom2 = Domain(
            idom.attributes,
            DiscreteVariable(idom.class_var.name, idom.class_var.values[:2])
        )
        iris1 = self.iris[:100].transform(dom1)
        iris2 = self.iris[:100].transform(dom2)

        predictor_iris1 = ConstantLearner()(iris1)
        predictor_iris2 = ConstantLearner()(iris2)
        self.send_signal(self.widget.Inputs.predictors, predictor_iris1)
        self.send_signal(self.widget.Inputs.predictors, predictor_iris2, 1)
        colors = self.widget._get_colors()
        np.testing.assert_array_equal(colors, iris1.domain.class_var.colors)

        # case 2: two domains one subset other - different color order
        idom = self.iris.domain
        colors = idom.class_var.colors[::-1]
        dom1 = Domain(
            idom.attributes,
            DiscreteVariable(idom.class_var.name, idom.class_var.values)
        )
        dom2 = Domain(
            idom.attributes,
            DiscreteVariable(idom.class_var.name, idom.class_var.values[:2])
        )
        dom1.class_var.colors = colors
        dom2.class_var.colors = colors[:2]
        iris1 = self.iris[:100].transform(dom1)
        iris2 = self.iris[:100].transform(dom2)

        predictor_iris1 = ConstantLearner()(iris1)
        predictor_iris2 = ConstantLearner()(iris2)
        self.send_signal(self.widget.Inputs.predictors, predictor_iris1)
        self.send_signal(self.widget.Inputs.predictors, predictor_iris2, 1)
        colors = self.widget._get_colors()
        np.testing.assert_array_equal(colors, iris1.domain.class_var.colors)

        # case 3: domain color, values miss-match - use default colors
        idom = self.iris.domain
        dom1 = Domain(
            idom.attributes,
            DiscreteVariable(idom.class_var.name, idom.class_var.values)
        )
        dom2 = Domain(
            idom.attributes,
            DiscreteVariable(idom.class_var.name, idom.class_var.values)
        )
        dom1.class_var.colors = dom1.class_var.colors[::-1]
        iris1 = self.iris.transform(dom1)
        iris2 = self.iris.transform(dom2)

        predictor_iris1 = ConstantLearner()(iris1)
        predictor_iris2 = ConstantLearner()(iris2)
        self.send_signal(self.widget.Inputs.predictors, predictor_iris1)
        self.send_signal(self.widget.Inputs.predictors, predictor_iris2, 1)
        colors = self.widget._get_colors()
        np.testing.assert_array_equal(colors, ColorPaletteGenerator.palette(3))

        # case 4: two domains different values order, matching colors
        idom = self.iris.domain
        # this way we know that default colors are not used
        colors = ColorPaletteGenerator.palette(5)[2:]
        dom1 = Domain(
            idom.attributes,
            DiscreteVariable(idom.class_var.name, idom.class_var.values)
        )
        dom2 = Domain(
            idom.attributes,
            DiscreteVariable(idom.class_var.name, idom.class_var.values[::-1])
        )
        dom1.class_var.colors = colors
        dom2.class_var.colors = colors[::-1]  # colors mixed same than values
        iris1 = self.iris[:100].transform(dom1)
        iris2 = self.iris[:100].transform(dom2)

        predictor_iris1 = ConstantLearner()(iris1)
        predictor_iris2 = ConstantLearner()(iris2)
        self.send_signal(self.widget.Inputs.predictors, predictor_iris1)
        self.send_signal(self.widget.Inputs.predictors, predictor_iris2, 1)
        colors = self.widget._get_colors()
        np.testing.assert_array_equal(colors, iris1.domain.class_var.colors)
Exemplo n.º 29
0
def tool_tip(value):
    value, dist = value
    if dist is not None:
        return "{!s} {!s}".format(value, dist)
    else:
        return str(value)


if __name__ == "__main__":  # pragma: no cover
    filename = "iris.tab"
    iris = Orange.data.Table(filename)
    idom = iris.domain
    dom = Domain(
        idom.attributes,
        DiscreteVariable(idom.class_var.name, idom.class_var.values[1::-1]))
    iris2 = iris[:100].transform(dom)

    def pred_error(data, *args, **kwargs):
        raise ValueError

    pred_error.domain = iris.domain
    pred_error.name = "To err is human"

    if iris.domain.has_discrete_class:
        predictors_ = [
            Orange.classification.SVMLearner(probability=True)(iris2),
            Orange.classification.LogisticRegressionLearner()(iris), pred_error
        ]
    elif iris.domain.has_continuous_class:
        predictors_ = [
Exemplo n.º 30
0
class TestInstance(unittest.TestCase):
    attributes = ["Feature %i" % i for i in range(10)]
    class_vars = ["Class %i" % i for i in range(1)]
    metas = [
        DiscreteVariable("Meta 1", values="XYZ"),
        ContinuousVariable("Meta 2"),
        StringVariable("Meta 3")
    ]

    def mock_domain(self, with_classes=False, with_metas=False):
        attributes = self.attributes
        class_vars = self.class_vars if with_classes else []
        metas = self.metas if with_metas else []
        variables = attributes + class_vars
        return MagicMock(Domain,
                         attributes=attributes,
                         class_vars=class_vars,
                         metas=metas,
                         variables=variables)

    def create_domain(self, attributes=(), classes=(), metas=()):
        attr_vars = [
            ContinuousVariable(name=a) if isinstance(a, str) else a
            for a in attributes
        ]
        class_vars = [
            ContinuousVariable(name=c) if isinstance(c, str) else c
            for c in classes
        ]
        meta_vars = [
            DiscreteVariable(name=m, values=map(str, range(5))) if isinstance(
                m, str) else m for m in metas
        ]
        domain = Domain(attr_vars, class_vars, meta_vars)
        return domain

    def test_init_x_no_data(self):
        domain = self.mock_domain()
        inst = Instance(domain)
        self.assertIsInstance(inst, Instance)
        self.assertIs(inst.domain, domain)
        self.assertEqual(inst._x.shape, (len(self.attributes), ))
        self.assertEqual(inst._y.shape, (0, ))
        self.assertEqual(inst._metas.shape, (0, ))
        self.assertTrue(all(isnan(x) for x in inst._x))

    def test_init_xy_no_data(self):
        domain = self.mock_domain(with_classes=True)
        inst = Instance(domain)
        self.assertIsInstance(inst, Instance)
        self.assertIs(inst.domain, domain)
        self.assertEqual(inst._x.shape, (len(self.attributes), ))
        self.assertEqual(inst._y.shape, (len(self.class_vars), ))
        self.assertEqual(inst._metas.shape, (0, ))
        self.assertTrue(all(isnan(x) for x in inst._x))
        self.assertTrue(all(isnan(x) for x in inst._y))

    def test_init_xym_no_data(self):
        domain = self.mock_domain(with_classes=True, with_metas=True)
        inst = Instance(domain)
        self.assertIsInstance(inst, Instance)
        self.assertIs(inst.domain, domain)
        self.assertEqual(inst._x.shape, (len(self.attributes), ))
        self.assertEqual(inst._y.shape, (len(self.class_vars), ))
        self.assertEqual(inst._metas.shape, (3, ))
        self.assertTrue(all(isnan(x) for x in inst._x))
        self.assertTrue(all(isnan(x) for x in inst._y))
        with warnings.catch_warnings():
            warnings.simplefilter("ignore", FutureWarning)
            assert_array_equal(
                inst._metas, np.array([Unknown, Unknown, Unknown],
                                      dtype=object))

    def test_init_x_arr(self):
        domain = self.create_domain(["x", DiscreteVariable("g", values="MF")])
        vals = np.array([42, 0])
        inst = Instance(domain, vals)
        assert_array_equal(inst._x, vals)
        self.assertEqual(inst._y.shape, (0, ))
        self.assertEqual(inst._metas.shape, (0, ))

        domain = self.create_domain()
        inst = Instance(domain, np.empty((0, )))
        self.assertEqual(inst._x.shape, (0, ))
        self.assertEqual(inst._y.shape, (0, ))
        self.assertEqual(inst._metas.shape, (0, ))

    def test_init_x_list(self):
        domain = self.create_domain(["x", DiscreteVariable("g", values="MF")])
        lst = [42, 0]
        vals = np.array(lst)
        inst = Instance(domain, vals)
        assert_array_equal(inst._x, vals)
        self.assertEqual(inst._y.shape, (0, ))
        self.assertEqual(inst._metas.shape, (0, ))

        domain = self.create_domain()
        inst = Instance(domain, [])
        self.assertEqual(inst._x.shape, (0, ))
        self.assertEqual(inst._y.shape, (0, ))
        self.assertEqual(inst._metas.shape, (0, ))

    def test_init_xy_arr(self):
        domain = self.create_domain(
            ["x", DiscreteVariable("g", values="MF")],
            [DiscreteVariable("y", values="ABC")])
        vals = np.array([42, 0, 1])
        inst = Instance(domain, vals)
        assert_array_equal(inst._x, vals[:2])
        self.assertEqual(inst._y.shape, (1, ))
        self.assertEqual(inst._y[0], 1)
        self.assertEqual(inst._metas.shape, (0, ))

    def test_init_xy_list(self):
        domain = self.create_domain(
            ["x", DiscreteVariable("g", values="MF")],
            [DiscreteVariable("y", values="ABC")])
        lst = [42, "M", "C"]
        vals = np.array([42, 0, 2])
        inst = Instance(domain, vals)
        assert_array_equal(inst._x, vals[:2])
        self.assertEqual(inst._y.shape, (1, ))
        self.assertEqual(inst._y[0], 2)
        self.assertEqual(inst._metas.shape, (0, ))

    def test_init_xym_arr(self):
        domain = self.create_domain(
            ["x", DiscreteVariable("g", values="MF")],
            [DiscreteVariable("y", values="ABC")], self.metas)
        vals = np.array([42, "M", "B", "X", 43, "Foo"], dtype=object)
        inst = Instance(domain, vals)
        self.assertIsInstance(inst, Instance)
        self.assertIs(inst.domain, domain)
        self.assertEqual(inst._x.shape, (2, ))
        self.assertEqual(inst._y.shape, (1, ))
        self.assertEqual(inst._metas.shape, (3, ))
        assert_array_equal(inst._x, np.array([42, 0]))
        self.assertEqual(inst._y[0], 1)
        assert_array_equal(inst._metas, np.array([0, 43, "Foo"], dtype=object))

    def test_init_xym_list(self):
        domain = self.create_domain(
            ["x", DiscreteVariable("g", values="MF")],
            [DiscreteVariable("y", values="ABC")], self.metas)
        vals = [42, "M", "B", "X", 43, "Foo"]
        inst = Instance(domain, vals)
        self.assertIsInstance(inst, Instance)
        self.assertIs(inst.domain, domain)
        self.assertEqual(inst._x.shape, (2, ))
        self.assertEqual(inst._y.shape, (1, ))
        self.assertEqual(inst._metas.shape, (3, ))
        assert_array_equal(inst._x, np.array([42, 0]))
        self.assertEqual(inst._y[0], 1)
        assert_array_equal(inst._metas, np.array([0, 43, "Foo"], dtype=object))

    def test_init_inst(self):
        domain = self.create_domain(
            ["x", DiscreteVariable("g", values="MF")],
            [DiscreteVariable("y", values="ABC")], self.metas)
        vals = [42, "M", "B", "X", 43, "Foo"]
        inst = Instance(domain, vals)

        inst2 = Instance(domain, inst)
        assert_array_equal(inst2._x, np.array([42, 0]))
        self.assertEqual(inst2._y[0], 1)
        assert_array_equal(inst2._metas, np.array([0, 43, "Foo"],
                                                  dtype=object))

        domain2 = self.create_domain(["z", domain[1], self.metas[1]],
                                     domain.class_vars,
                                     [self.metas[0], "w", domain[0]])
        inst2 = Instance(domain2, inst)
        with warnings.catch_warnings():
            warnings.simplefilter("ignore", FutureWarning)
            assert_array_equal(inst2._x, np.array([Unknown, 0, 43]))
            self.assertEqual(inst2._y[0], 1)
            assert_array_equal(inst2._metas,
                               np.array([0, Unknown, 42], dtype=object))

    def test_get_item(self):
        domain = self.create_domain(
            ["x", DiscreteVariable("g", values="MF")],
            [DiscreteVariable("y", values="ABC")], self.metas)
        vals = [42, "M", "B", "X", 43, "Foo"]
        inst = Instance(domain, vals)

        val = inst[0]
        self.assertIsInstance(val, Value)
        self.assertEqual(inst[0], 42)
        self.assertEqual(inst["x"], 42)
        self.assertEqual(inst[domain[0]], 42)

        val = inst[1]
        self.assertIsInstance(val, Value)
        self.assertEqual(inst[1], "M")
        self.assertEqual(inst["g"], "M")
        self.assertEqual(inst[domain[1]], "M")

        val = inst[2]
        self.assertIsInstance(val, Value)
        self.assertEqual(inst[2], "B")
        self.assertEqual(inst["y"], "B")
        self.assertEqual(inst[domain.class_var], "B")

        val = inst[-2]
        self.assertIsInstance(val, Value)
        self.assertEqual(inst[-2], 43)
        self.assertEqual(inst["Meta 2"], 43)
        self.assertEqual(inst[self.metas[1]], 43)

        with self.assertRaises(ValueError):
            inst["asdf"] = 42
        with self.assertRaises(ValueError):
            inst[ContinuousVariable("asdf")] = 42

    def test_list(self):
        domain = self.create_domain(
            ["x", DiscreteVariable("g", values="MF")],
            [DiscreteVariable("y", values="ABC")], self.metas)
        vals = [42, "M", "B", "X", 43, "Foo"]
        inst = Instance(domain, vals)

        l = inst.list
        self.assertIsInstance(l, list)
        self.assertEqual(l, [42, "M", "B", "X", 43, "Foo"])
        self.assertGreater(len(l), len(inst))
        self.assertEqual(len(l), 6)

    def test_set_item(self):
        domain = self.create_domain(
            ["x", DiscreteVariable("g", values="MF")],
            [DiscreteVariable("y", values="ABC")], self.metas)
        vals = [42, "M", "B", "X", 43, "Foo"]
        inst = Instance(domain, vals)

        inst[0] = 43
        self.assertEqual(inst[0], 43)
        inst["x"] = 44
        self.assertEqual(inst[0], 44)
        inst[domain[0]] = 45
        self.assertEqual(inst[0], 45)

        inst[1] = "F"
        self.assertEqual(inst[1], "F")
        inst["g"] = "M"
        self.assertEqual(inst[1], "M")
        with self.assertRaises(ValueError):
            inst[1] = "N"
        with self.assertRaises(ValueError):
            inst["asdf"] = 42

        inst[2] = "C"
        self.assertEqual(inst[2], "C")
        inst["y"] = "A"
        self.assertEqual(inst[2], "A")
        inst[domain.class_var] = "B"
        self.assertEqual(inst[2], "B")

        inst[-1] = "Y"
        self.assertEqual(inst[-1], "Y")
        inst["Meta 1"] = "Z"
        self.assertEqual(inst[-1], "Z")
        inst[domain.metas[0]] = "X"
        self.assertEqual(inst[-1], "X")

    def test_str(self):
        domain = self.create_domain(["x", DiscreteVariable("g", values="MF")])
        inst = Instance(domain, [42, 0])
        self.assertEqual(str(inst), "[42.000, M]")

        domain = self.create_domain(
            ["x", DiscreteVariable("g", values="MF")],
            [DiscreteVariable("y", values="ABC")])
        inst = Instance(domain, [42, "M", "B"])
        self.assertEqual(str(inst), "[42.000, M | B]")

        domain = self.create_domain(
            ["x", DiscreteVariable("g", values="MF")],
            [DiscreteVariable("y", values="ABC")], self.metas)
        inst = Instance(domain, [42, "M", "B", "X", 43, "Foo"])
        self.assertEqual(str(inst), "[42.000, M | B] {X, 43.000, Foo}")

        domain = self.create_domain([], [DiscreteVariable("y", values="ABC")],
                                    self.metas)
        inst = Instance(domain, ["B", "X", 43, "Foo"])
        self.assertEqual(str(inst), "[ | B] {X, 43.000, Foo}")

        domain = self.create_domain([], [], self.metas)
        inst = Instance(domain, ["X", 43, "Foo"])
        self.assertEqual(str(inst), "[] {X, 43.000, Foo}")

        domain = self.create_domain(self.attributes)
        inst = Instance(domain, range(len(self.attributes)))
        self.assertEqual(
            str(inst),
            "[{}]".format(", ".join("{:.3f}".format(x)
                                    for x in range(len(self.attributes)))))

        for attr in domain:
            attr.number_of_decimals = 0
        self.assertEqual(
            str(inst),
            "[{}]".format(", ".join("{}".format(x)
                                    for x in range(len(self.attributes)))))

    def test_repr(self):
        domain = self.create_domain(self.attributes)
        inst = Instance(domain, range(len(self.attributes)))
        self.assertEqual(repr(inst),
                         "[0.000, 1.000, 2.000, 3.000, 4.000, ...]")

        for attr in domain:
            attr.number_of_decimals = 0
        self.assertEqual(repr(inst), "[0, 1, 2, 3, 4, ...]")

    def test_eq(self):
        domain = self.create_domain(
            ["x", DiscreteVariable("g", values="MF")],
            [DiscreteVariable("y", values="ABC")], self.metas)
        vals = [42, "M", "B", "X", 43, "Foo"]
        inst = Instance(domain, vals)
        inst2 = Instance(domain, vals)
        self.assertTrue(inst == inst2)
        self.assertTrue(inst2 == inst)

        inst2[0] = 43
        self.assertFalse(inst == inst2)

        inst2[0] = Unknown
        self.assertFalse(inst == inst2)

        inst2 = Instance(domain, vals)
        inst2[2] = "C"
        self.assertFalse(inst == inst2)

        inst2 = Instance(domain, vals)
        inst2[-1] = "Y"
        self.assertFalse(inst == inst2)

        inst2 = Instance(domain, vals)
        inst2[-2] = "33"
        self.assertFalse(inst == inst2)

        inst2 = Instance(domain, vals)
        inst2[-3] = "Bar"
        self.assertFalse(inst == inst2)

    def test_instance_id(self):
        domain = self.create_domain(["x"])
        vals = [42]

        inst = Instance(domain, vals, id=42)
        self.assertEqual(inst.id, 42)

        inst2 = Instance(domain, vals)
        inst3 = Instance(domain, vals)

        self.assertNotEqual(inst2.id, inst3.id)