Пример #1
0
 def setUp(self):
     self.data = [
         [1, 2, 3, None, 'm'],
         [2, 3, 1, 4, 'f'],
         [None, None, None, None, None],
         [7, None, 3, None, 'f'],
     ]
     conn, self.table_name = self.create_sql_table(self.data)
     self.table = SqlTable(conn, self.table_name, inspect_values=True)
Пример #2
0
    def test_other(self):
        table = np.array([
            'bcd4d9c0-361e-bad4-7ceb-0d171cdec981',
            '544b7ddc-d861-0201-81c8-9f7ad0bbf531',
            'b35a10f7-7901-f313-ec16-5ad9778040a6',
            'b267c4be-4a26-60b5-e664-737a90a40e93'
        ]).reshape(-1, 1)
        conn, table_name = self.create_sql_table(table, ['uuid'])

        sql_table = SqlTable(conn, table_name, inspect_values=False)
        self.assertFirstMetaIsInstance(sql_table, StringVariable)

        sql_table = SqlTable(conn, table_name, inspect_values=True)
        self.assertFirstMetaIsInstance(sql_table, StringVariable)

        filters = filter.Values(
            [filter.FilterString(-1, filter.FilterString.Equal, 'foo')])
        self.assertEqual(len(filters(sql_table)), 0)
Пример #3
0
 def setUp(self):
     self.data = [
         [1, 2, 3, 'a', 'm'],
         [2, None, 1, 'a', 'f'],
         [None, 3, 1, 'b', None],
         [2, 2, 3, 'b', 'f'],
     ]
     self.conn, self.table_name = self.create_sql_table(self.data)
     self.table = SqlTable(self.conn, self.table_name, inspect_values=True)
Пример #4
0
 def setUp(self):
     self.data = [
         [1, 2, 3, 'a', 'm'],
         [2, None, 1, 'a', 'f'],
         [None, 3, 1, 'b', None],
         [2, 2, 3, 'b', 'f'],
     ]
     self.table_uri = self.create_sql_table(self.data)
     self.table = SqlTable(self.table_uri)
Пример #5
0
    def test_distributions(self):
        iris = SqlTable(self.conn, self.iris, inspect_values=True)

        dists = get_distributions(iris)
        self.assertEqual(len(dists), 5)
        dist = dists[0]
        self.assertAlmostEqual(dist.min(), 4.3)
        self.assertAlmostEqual(dist.max(), 7.9)
        self.assertAlmostEqual(dist.mean(), 5.8, 1)
Пример #6
0
    def sql_table_from_data(self, data, guess_values=True):
        from Orange.data.sql.backend.postgres import Psycopg2Backend
        assert Psycopg2Backend.connection_pool is not None

        table_name = self._create_sql_table(data)
        yield SqlTable(connection_params(),
                       table_name,
                       inspect_values=guess_values)
        self.drop_sql_table(table_name)
Пример #7
0
 def test_PCA(self):
     iris_v = ['Iris-setosa', 'Iris-virginica', 'Iris-versicolor']
     table = SqlTable(self.conn,
                      self.iris,
                      type_hints=Domain([],
                                        DiscreteVariable("iris",
                                                         values=iris_v)))
     for batch_size in (50, 500):
         rpca = RemotePCA(table, batch_size, 20)
         self.assertEqual(rpca.components_.shape, (4, 4))
Пример #8
0
 def test_download_data(self):
     mat = np.random.randint(0, 2, (20, 3))
     conn, table_name = self.create_sql_table(mat)
     for member in ('X', 'Y', 'metas', 'W', 'ids'):
         sql_table = SqlTable(conn, table_name,
                              type_hints=Domain([], DiscreteVariable(
                                 name='col2', values=['0', '1', '2'])))
         self.assertFalse(getattr(sql_table, member) is None)
     # has all necessary class members to create a standard Table
     Table(sql_table.domain, sql_table)
Пример #9
0
    def test_contingencies(self):
        iris = SqlTable(self.conn, self.iris, inspect_values=True)
        iris.domain = Domain(iris.domain[:2] + (EqualWidth()(iris, iris.domain['sepal width']),),
                             iris.domain['iris'])

        conts = get_contingencies(iris)
        self.assertEqual(len(conts), 3)
        self.assertIsInstance(conts[0], Continuous)
        self.assertIsInstance(conts[1], Continuous)
        self.assertIsInstance(conts[2], Discrete)
Пример #10
0
 def test_XY_small(self):
     mat = np.random.randint(0, 2, (20, 3))
     conn, table_name = self.create_sql_table(mat)
     sql_table = SqlTable(conn,
                          table_name,
                          type_hints=Domain([],
                                            DiscreteVariable(
                                                name='col2',
                                                values=['0', '1', '2'])))
     assert_almost_equal(sql_table.X, mat[:, :2])
     assert_almost_equal(sql_table.Y.flatten(), mat[:, 2])
Пример #11
0
    def test_meta_type_hints(self):
        iris = SqlTable(
            self.conn,
            self.iris,
            type_hints=Domain([], metas=[self.IRIS_VARIABLE]),
        )

        self.assertEqual(len(iris.domain.metas), 1)
        self.assertEqual(iris.domain.metas[0].name, "iris")
        np.testing.assert_array_equal(iris.metas.flatten(),
                                      [0] * 50 + [2] * 50 + [1] * 50)
Пример #12
0
    def open_table(self):
        if self.tablecombo.currentIndex() == 0:
            return

        self.table = self.tablecombo.currentText()

        table = SqlTable(host=self.host,
                         database=self.database,
                         user=self.username,
                         password=self.password,
                         table=self.table)
        self.send("Data", table)
Пример #13
0
 def setUpDB(self):
     self.data = [
         [1, 2, 3, None, 'm'],
         [2, 3, 1, 4, 'f'],
         [None, None, None, None, None],
         [7, None, 3, None, 'f'],
     ]
     self.conn, self.table_name = self.create_sql_table(self.data)
     table = SqlTable(self.conn, self.table_name, inspect_values=True)
     variables = table.domain.variables
     new_table = table.copy()
     new_table.domain = domain.Domain(variables[:-1], variables[-1:])
     self.table = new_table
Пример #14
0
 def setUp(self):
     self.data = [
         [1, 2, 3, None, 'm'],
         [2, 3, 1, 4, 'f'],
         [None, None, None, None, None],
         [7, None, 3, None, 'f'],
     ]
     self.table_uri = self.create_sql_table(self.data)
     table = SqlTable(self.table_uri)
     variables = table.domain.variables
     new_table = table.copy()
     new_table.domain = domain.Domain(variables[:-1], variables[-1:])
     self.table = new_table
Пример #15
0
    def test_query_subset_of_attributes(self):
        table = SqlTable(self.conn, self.iris)
        attributes = [
            self._mock_attribute("sepal length"),
            self._mock_attribute("sepal width"),
            self._mock_attribute("double width", '2 * "sepal width"')
        ]
        results = list(table._query(attributes))

        self.assertSequenceEqual(results[:5],
                                 [(5.1, 3.5, 7.0), (4.9, 3.0, 6.0),
                                  (4.7, 3.2, 6.4), (4.6, 3.1, 6.2),
                                  (5.0, 3.6, 7.2)])
Пример #16
0
 def test_PCA(self):
     table = SqlTable(connection_params(),
                      'iris',
                      type_hints=Domain([],
                                        DiscreteVariable(
                                            "iris",
                                            values=[
                                                'Iris-setosa',
                                                'Iris-virginica',
                                                'Iris-versicolor'
                                            ])))
     for batch_size in (50, 500):
         rpca = RemotePCA(table, batch_size, 10)
         self.assertEqual(rpca.components_.shape, (4, 4))
Пример #17
0
 def test_download_data(self):
     mat = np.random.randint(0, 2, (20, 3))
     conn, table_name = self.create_sql_table(mat)
     for member in ("X", "Y", "metas", "W", "ids"):
         sql_table = SqlTable(
             conn,
             table_name,
             type_hints=Domain([],
                               DiscreteVariable(name="col2",
                                                values=["0", "1", "2"])),
         )
         self.assertFalse(getattr(sql_table, member) is None)
     # has all necessary class members to create a standard Table
     Table(sql_table.domain, sql_table)
Пример #18
0
    def test_meta_char(self):
        table = np.array(list('ABCDEFGHIJKLMNOPQRSTUVW')).reshape(-1, 1)
        conn, table_name = self.create_sql_table(table, ['char(1)'])

        sql_table = SqlTable(conn, table_name, inspect_values=False)
        self.assertFirstMetaIsInstance(sql_table, StringVariable)

        sql_table = SqlTable(conn, table_name, inspect_values=True)
        self.assertFirstMetaIsInstance(sql_table, StringVariable)
        self.drop_sql_table(table_name)

        # test if NULL is transformed to emtpy string
        table = np.array(list("ABCDEFGHIJKLMNOPQRSTUVW") + [None]).reshape(
            -1, 1)
        conn, table_name = self.create_sql_table(table, ["char(1)"])

        sql_table = SqlTable(conn, table_name, inspect_values=False)
        self.assertFirstMetaIsInstance(sql_table, StringVariable)
        self.assertEqual("", sql_table.metas[-1, 0])

        sql_table = SqlTable(conn, table_name, inspect_values=True)
        self.assertFirstMetaIsInstance(sql_table, StringVariable)
        self.assertEqual("", sql_table.metas[-1, 0])
        self.drop_sql_table(table_name)
Пример #19
0
 def setUpDB(self):
     self.data = [
         [w] for w in "Lorem ipsum dolor sit amet, consectetur adipiscing"
         "elit. Vestibulum vel dolor nulla. Etiam elit lectus, mollis nec"
         "mattis sed, pellentesque in turpis. Vivamus non nisi dolor. Etiam"
         "lacinia dictum purus, in ullamcorper ante vulputate sed. Nullam"
         "congue blandit elementum. Donec blandit laoreet posuere. Proin"
         "quis augue eget tortor posuere mollis. Fusce vestibulum bibendum"
         "neque at convallis. Donec iaculis risus volutpat malesuada"
         "vehicula. Ut cursus tempor massa vulputate lacinia. Pellentesque"
         "eu tortor sed diam placerat porttitor et volutpat risus. In"
         "vulputate rutrum lacus ac sagittis. Suspendisse interdum luctus"
         "sem auctor commodo.".split(' ')] + [[None], [None]]
     self.conn, self.table_name = self.create_sql_table(self.data)
     self.table = SqlTable(self.conn, self.table_name)
Пример #20
0
 def test_XY_large(self):
     mat = np.random.randint(0, 2, (1020, 3))
     conn, table_name = self.create_sql_table(mat)
     sql_table = SqlTable(conn, table_name,
                          type_hints=Domain([], DiscreteVariable(
                              name='col2', values=['0', '1', '2'])))
     with self.assertRaises(ValueError):
         sql_table.X
     with self.assertRaises(ValueError):
         sql_table.Y
     with self.assertRaises(ValueError):
         sql_table.download_data(1019)
     sql_table.download_data()
     assert_almost_equal(sql_table.X, mat[:, :2])
     assert_almost_equal(sql_table.Y.flatten(), mat[:, 2])
Пример #21
0
    def test_list_tables_with_schema(self):
        with self.backend.execute_sql_query(
                "DROP SCHEMA IF EXISTS orange_tests CASCADE") as cur:
            cur.execute("CREATE SCHEMA orange_tests")
            cur.execute("CREATE TABLE orange_tests.efgh (id int)")
            cur.execute("INSERT INTO orange_tests.efgh (id) VALUES (1)")
            cur.execute("INSERT INTO orange_tests.efgh (id) VALUES (2)")

        try:
            tables = self.backend.list_tables("orange_tests")
            self.assertTrue(any([t.name == "efgh" for t in tables]))
            SqlTable(self.conn, tables[0], inspect_values=True)
        finally:
            with self.backend.execute_sql_query(
                    "DROP SCHEMA IF EXISTS orange_tests CASCADE"):
                pass
Пример #22
0
    def open_table(self):
        if self.tablecombo.currentIndex() <= 0:
            return

        if self.tablecombo.currentIndex() < self.tablecombo.count() - 1:
            self.table = self.tablecombo.currentText()
        else:
            self.table = self.sqltext.toPlainText()

        table = SqlTable(dict(host=self.host,
                              port=self.port,
                              database=self.database,
                              user=self.username,
                              password=self.password),
                         self.table,
                         inspect_values=False)
        sample = False
        if table.approx_len() > LARGE_TABLE and self.guess_values:
            confirm = QMessageBox(self)
            confirm.setIcon(QMessageBox.Warning)
            confirm.setText("Attribute discovery might take "
                            "a long time on large tables.\n"
                            "Do you want to auto discover attributes?")
            confirm.addButton("Yes", QMessageBox.YesRole)
            no_button = confirm.addButton("No", QMessageBox.NoRole)
            sample_button = confirm.addButton("Yes, on a sample",
                                              QMessageBox.YesRole)
            confirm.exec()
            if confirm.clickedButton() == no_button:
                self.guess_values = False
            elif confirm.clickedButton() == sample_button:
                sample = True

        self.information(1)
        if self.guess_values:
            QApplication.setOverrideCursor(QCursor(Qt.WaitCursor))
            if sample:
                s = table.sample_time(1)
                domain = s.get_domain(guess_values=True)
                self.information(
                    1, "Domain was generated from a sample of the table.")
            else:
                domain = table.get_domain(guess_values=True)
            QApplication.restoreOverrideCursor()
            table.domain = domain

        self.send("Data", table)
Пример #23
0
 def test_NaiveBayes(self):
     table = SqlTable(dict(host='localhost', database='test'), 'iris',
                      type_hints=Domain([], DiscreteVariable("iris",
                             values=['Iris-setosa', 'Iris-virginica',
                                     'Iris-versicolor'])))
     table = preprocess.Discretize(table)
     bayes = nb.NaiveBayesLearner()
     clf = bayes(table)
     # Single instance prediction
     self.assertEqual(clf(table[0]), table[0].get_class())
     # Table prediction
     pred = clf(table)
     actual = array([ins.get_class() for ins in table])
     ca = pred == actual
     ca = ca.sum() / len(ca)
     self.assertGreater(ca, 0.95)
     self.assertLess(ca, 1.)
Пример #24
0
    def test_recovers_connection_after_sql_error(self):
        conn, table_name = self.create_sql_table(
            np.arange(25).reshape((-1, 1)))
        sql_table = SqlTable(conn, table_name)

        try:
            broken_query = "SELECT 1/%s FROM %s" % (
                sql_table.domain.attributes[0].to_sql(), sql_table.table_name)
            with sql_table.backend.execute_sql_query(broken_query) as cur:
                cur.fetchall()
        except BackendError:
            pass

        working_query = "SELECT %s FROM %s" % (
            sql_table.domain.attributes[0].to_sql(), sql_table.table_name)
        with sql_table.backend.execute_sql_query(working_query) as cur:
            cur.fetchall()
Пример #25
0
    def test_basic_stats(self):
        iris = SqlTable(self.conn, self.iris, inspect_values=True)
        stats = BasicStats(iris, iris.domain['sepal length'])
        self.assertAlmostEqual(stats.min, 4.3)
        self.assertAlmostEqual(stats.max, 7.9)
        self.assertAlmostEqual(stats.mean, 5.8, 1)
        self.assertEqual(stats.nans, 0)
        self.assertEqual(stats.non_nans, 150)

        domain_stats = DomainBasicStats(iris, include_metas=True)
        self.assertEqual(len(domain_stats.stats),
                         len(iris.domain) + len(iris.domain.metas))
        stats = domain_stats['sepal length']
        self.assertAlmostEqual(stats.min, 4.3)
        self.assertAlmostEqual(stats.max, 7.9)
        self.assertAlmostEqual(stats.mean, 5.8, 1)
        self.assertEqual(stats.nans, 0)
        self.assertEqual(stats.non_nans, 150)
Пример #26
0
    def test_joins(self):
        table = SqlTable(self.conn,
                         """SELECT a."sepal length",
                          b. "petal length",
                          CASE WHEN b."petal length" < 3 THEN '<'
                               ELSE '>'
                           END AS "qualitative petal length"
                     FROM iris a
               INNER JOIN iris b ON a."sepal width" = b."sepal width"
                    WHERE a."petal width" < 1
                 ORDER BY a."sepal length", b. "petal length" ASC""",
                         type_hints=Domain([
                             DiscreteVariable(name="qualitative petal length",
                                              values=['<', '>'])
                         ], []))

        self.assertEqual(len(table), 498)
        self.assertAlmostEqual(list(table[497]), [5.8, 1.2, 0.])
Пример #27
0
    def test_query_subset_of_rows(self):
        table = SqlTable(self.conn, self.iris)
        all_results = list(table._query())

        results = list(table._query(rows=range(10)))
        self.assertEqual(len(results), 10)
        self.assertSequenceEqual(results, all_results[:10])

        results = list(table._query(rows=range(10)))
        self.assertEqual(len(results), 10)
        self.assertSequenceEqual(results, all_results[:10])

        results = list(table._query(rows=slice(None, 10)))
        self.assertEqual(len(results), 10)
        self.assertSequenceEqual(results, all_results[:10])

        results = list(table._query(rows=slice(10, None)))
        self.assertEqual(len(results), 140)
        self.assertSequenceEqual(results, all_results[10:])
Пример #28
0
 def test_XY_large(self):
     from Orange.data.sql.table import AUTO_DL_LIMIT as DLL
     mat = np.random.randint(0, 2, (DLL + 100, 3))
     conn, table_name = self.create_sql_table(mat)
     sql_table = SqlTable(conn, table_name,
                          type_hints=Domain([], DiscreteVariable(
                              name='col2', values=['0', '1', '2'])))
     self.assertRaises(ValueError, lambda: sql_table.X)
     self.assertRaises(ValueError, lambda: sql_table.Y)
     with self.assertRaises(ValueError):
         sql_table.download_data(DLL + 10)
     # Download partial data
     sql_table.download_data(DLL + 10, partial=True)
     assert_almost_equal(sql_table.X, mat[:DLL + 10, :2])
     assert_almost_equal(sql_table.Y.flatten()[:DLL + 10], mat[:DLL + 10, 2])
     # Download all data
     sql_table.download_data()
     assert_almost_equal(sql_table.X, mat[:, :2])
     assert_almost_equal(sql_table.Y.flatten(), mat[:, 2])
Пример #29
0
 def test_NaiveBayes(self):
     iris_v = ['Iris-setosa', 'Iris-virginica', 'Iris-versicolor']
     table = SqlTable(self.conn,
                      self.iris,
                      type_hints=Domain([],
                                        DiscreteVariable("iris",
                                                         values=iris_v)))
     disc = preprocess.Discretize()
     table = disc(table)
     bayes = nb.NaiveBayesLearner()
     clf = bayes(table)
     # Single instance prediction
     self.assertEqual(clf(table[0]), table[0].get_class())
     # Table prediction
     pred = clf(table)
     actual = array([ins.get_class() for ins in table])
     ca = pred == actual
     ca = ca.sum() / len(ca)
     self.assertGreater(ca, 0.95)
     self.assertLess(ca, 1.)
Пример #30
0
    def test_universal_table(self):
        _, table_name = self.construct_universal_table()

        SqlTable(self.conn, """
            SELECT
                v1.col2 as v1,
                v2.col2 as v2,
                v3.col2 as v3,
                v4.col2 as v4,
                v5.col2 as v5
              FROM %(table_name)s v1
        INNER JOIN %(table_name)s v2 ON v2.col0 = v1.col0 AND v2.col1 = 2
        INNER JOIN %(table_name)s v3 ON v3.col0 = v2.col0 AND v3.col1 = 3
        INNER JOIN %(table_name)s v4 ON v4.col0 = v1.col0 AND v4.col1 = 4
        INNER JOIN %(table_name)s v5 ON v5.col0 = v1.col0 AND v5.col1 = 5
             WHERE v1.col1 = 1
          ORDER BY v1.col0
        """ % dict(table_name='"%s"' % table_name))

        self.drop_sql_table(table_name)