Exemplo n.º 1
0
class DefaultTemplateMetaDataCompatibilityTest(NodeProvider,
                                               unittest.TestCase):
    CLUSTER_ID = gen_id()

    CLUSTER_SETTINGS = {
        'cluster.name': CLUSTER_ID,
    }

    SUPPORTED_VERSIONS = (VersionDef('3.0.x', False, []),
                          VersionDef('latest-nightly', False, []))

    def test_metadata_compatibility(self):
        nodes = 3

        cluster = self._new_cluster(self.SUPPORTED_VERSIONS[0].version, nodes,
                                    self.CLUSTER_SETTINGS)
        cluster.start()
        with connect(cluster.node().http_url, error_trace=True) as conn:
            cursor = conn.cursor()
            cursor.execute("select 1")
        self._process_on_stop()

        for version_def in self.SUPPORTED_VERSIONS[1:]:
            self.assert_dynamic_string_detection(version_def, nodes)

    def assert_dynamic_string_detection(self, version_def, nodes):
        """ Test that a dynamic string column detection works as expected.

        If the cluster was initially created/started with a lower CrateDB
        version, we must ensure that our default template is also upgraded, if
        needed, because it is persisted in the cluster state. That's why
        re-creating tables would not help.
        """
        self._move_nodes_folder_if_needed()
        cluster = self._new_cluster(version_def.version, nodes,
                                    self.CLUSTER_SETTINGS,
                                    prepare_env(version_def.java_home))
        cluster.start()
        with connect(cluster.node().http_url, error_trace=True) as conn:
            cursor = conn.cursor()
            cursor.execute('CREATE TABLE t1 (o object)')
            cursor.execute('''INSERT INTO t1 (o) VALUES ({"name" = 'foo'})''')
            self.assertEqual(cursor.rowcount, 1)
            cursor.execute('REFRESH TABLE t1')
            cursor.execute("SELECT o['name'], count(*) FROM t1 GROUP BY 1")
            rs = cursor.fetchall()
            self.assertEqual(['foo', 1], rs[0])
            cursor.execute('DROP TABLE t1')
            self._process_on_stop()

    def _move_nodes_folder_if_needed(self):
        """Eliminates the cluster-id folder inside the data directory."""
        data_path_incl_cluster_id = os.path.join(self._path_data,
                                                 self.CLUSTER_ID)
        if os.path.exists(data_path_incl_cluster_id):
            src_path_nodes = os.path.join(data_path_incl_cluster_id, 'nodes')
            target_path_nodes = os.path.join(self._path_data, 'nodes')
            shutil.move(src_path_nodes, target_path_nodes)
            shutil.rmtree(data_path_incl_cluster_id)
Exemplo n.º 2
0
class TableSettingsCompatibilityTest(NodeProvider, unittest.TestCase):

    CLUSTER_SETTINGS = {
        'cluster.name': gen_id(),
    }

    SUPPORTED_VERSIONS = (VersionDef('2.3.x', False,
                                     []), VersionDef('3.2.x', False, []))

    def test_altering_tables_with_old_settings(self):
        """ Test that the settings of tables created with an old not anymore
        supported setting can still be changed when running with the latest
        version. This test ensures that old settings are removed on upgrade or
        at latest when changing some table settings. Before 3.1.2, purging old
        settings was not done correctly and thus altering settings of such
        tables failed.
        """

        nodes = 3

        cluster = self._new_cluster(self.SUPPORTED_VERSIONS[0].version, nodes,
                                    self.CLUSTER_SETTINGS)
        cluster.start()
        with connect(cluster.node().http_url, error_trace=True) as conn:
            cursor = conn.cursor()

            # The used setting is only valid until version 2.3.x
            cursor.execute('''
                CREATE TABLE t1 (id int) clustered into 4 shards with ("recovery.initial_shards"=1, number_of_replicas=0);
            ''')
            cursor.execute('''
                CREATE TABLE p1 (id int, p int) clustered into 4 shards partitioned by (p) with ("recovery.initial_shards"=1, number_of_replicas=0);
            ''')
            cursor.execute('''
                INSERT INTO p1 (id, p) VALUES (1, 1);
            ''')
        self._process_on_stop()

        for version_def in self.SUPPORTED_VERSIONS[1:]:
            self.start_cluster_and_alter_tables(version_def, nodes)

    def start_cluster_and_alter_tables(self, version_def, nodes):
        cluster = self._new_cluster(version_def.version, nodes,
                                    self.CLUSTER_SETTINGS,
                                    prepare_env(version_def.java_home))
        cluster.start()
        with connect(cluster.node().http_url, error_trace=True) as conn:
            cursor = conn.cursor()
            wait_for_active_shards(cursor, 8)
            cursor.execute('''
                ALTER TABLE t1 SET (number_of_replicas=1)
            ''')
            cursor.execute('''
                ALTER TABLE p1 SET (number_of_replicas=1)
            ''')
        self._process_on_stop()
Exemplo n.º 3
0
    def test_snapshot_compatibility(self):
        """Test snapshot compatibility when upgrading 3.3.x -> 4.x.x

        Using Minio as a S3 repository, the first cluster that runs
        creates the repo, a table and inserts/selects some data, which
        then is snapshotted and deleted. The next cluster recovers the
        data from the last snapshot, performs further inserts/selects,
        to then snapshot the data and delete it.

        We are interested in the transition 3.3.x -> 4.x.x
        """
        with MinioServer() as minio:
            t = threading.Thread(target=minio.run)
            t.daemon = True
            t.start()
            wait_until(lambda: _is_up('127.0.0.1', 9000))

            num_nodes = 3
            num_docs = 30
            prev_version = None
            num_snapshot = 1
            path_data = 'data_test_snapshot_compatibility'
            cluster_settings = {
                'cluster.name': gen_id(),
                'path.data': path_data
            }
            shutil.rmtree(path_data, ignore_errors=True)
            for version in self.VERSION:
                cluster = self._new_cluster(version,
                                            num_nodes,
                                            settings=cluster_settings)
                cluster.start()
                with connect(cluster.node().http_url,
                             error_trace=True) as conn:
                    c = conn.cursor()
                    if not prev_version:
                        c.execute(self.CREATE_REPOSITORY)
                        c.execute(CREATE_ANALYZER)
                        c.execute(CREATE_DOC_TABLE)
                        insert_data(conn, 'doc', 't1', num_docs)
                    else:
                        c.execute(
                            self.RESTORE_SNAPSHOT_TPT.format(num_snapshot - 1))
                    c.execute('SELECT COUNT(*) FROM t1')
                    rowcount = c.fetchone()[0]
                    self.assertEqual(rowcount, num_docs)
                    run_selects(c, version)
                    c.execute(self.CREATE_SNAPSHOT_TPT.format(num_snapshot))
                    c.execute(self.DROP_DOC_TABLE)
                self._process_on_stop()
                prev_version = version
                num_snapshot += 1
            shutil.rmtree(path_data, ignore_errors=True)
Exemplo n.º 4
0
class SqlLogicTest(NodeProvider, unittest.TestCase):
    CLUSTER_SETTINGS = {
        'cluster.name': gen_id(),
    }

    def test_sqllogic(self):
        """ Runs sqllogictests against latest CrateDB. """
        (node, _) = self._new_node(self.CRATE_VERSION)
        node.start()
        psql_addr = node.addresses.psql
        logfiles = []
        try:
            with ProcessPoolExecutor() as executor:
                futures = []
                for i, filename in enumerate(tests_path.glob('**/*.test')):
                    filepath = tests_path / filename
                    relpath = str(filepath.relative_to(tests_path))
                    if not any(p.match(str(relpath)) for p in FILE_WHITELIST):
                        continue

                    logfile = os.path.join(
                        here, f'sqllogic-{os.path.basename(relpath)}-{i}.log')
                    logfiles.append(logfile)
                    future = executor.submit(run_file,
                                             filename=str(filepath),
                                             host='localhost',
                                             port=str(psql_addr.port),
                                             log_level=logging.WARNING,
                                             log_file=logfile,
                                             failfast=True,
                                             schema=f'x{i}')
                    futures.append(future)
                for future in as_completed(futures):
                    future.result()
        finally:
            # instead of having dozens file merge to one which is in gitignore
            merge_logfiles(logfiles)
Exemplo n.º 5
0
    def test_snapshot_restore_and_drop_in_parallel(self):
        """Test to run the drop and restore operation on two different
           snapshots in parallel.

        The purpose of this test is to validate that the snapshot mechanism
        of CrateDB can handle the two operations in parallel. Here, Minio is
        used as s3 backend for the repository, but this should work on any
        other backend as well.
        """
        with MinioServer() as minio:
            t = threading.Thread(target=minio.run)
            t.daemon = True
            t.start()
            wait_until(lambda: _is_up('127.0.0.1', 9000))

            num_nodes = random.randint(3, 5)
            number_of_shards = random.randint(1, 3)
            number_of_replicas = random.randint(0, 2)
            num_docs = random.randint(1, 100)

            cluster_settings = {
                'cluster.name': gen_id(),
                'path.data': self.PATH_DATA
            }
            shutil.rmtree(self.PATH_DATA, ignore_errors=True)
            cluster = self._new_cluster('latest-nightly',
                                        num_nodes,
                                        settings=cluster_settings)
            cluster.start()

            with connect(cluster.node().http_url, error_trace=True) as conn:
                c = conn.cursor()
                wait_for_active_shards(c)
                c.execute(
                    '''
                            create table doc.test(x int) clustered into ? shards with( number_of_replicas =?)
                         ''', (
                        number_of_shards,
                        number_of_replicas,
                    ))

                insert_data(conn, 'doc', 'test', num_docs)

                c.execute('''
                            CREATE REPOSITORY repo TYPE S3
                            WITH (access_key = 'minio',
                            secret_key = 'miniostorage',
                            bucket='backups',
                            endpoint = '127.0.0.1:9000',
                            protocol = 'http')
                        ''')

                c.execute(
                    'CREATE SNAPSHOT repo.snapshot1 TABLE doc.test WITH (wait_for_completion = true)'
                )
                c.execute(
                    'CREATE SNAPSHOT repo.snapshot2 TABLE doc.test WITH (wait_for_completion = true)'
                )
                c.execute('DROP TABLE doc.test')
                # Drop snapshot2 while the restore of snapshot1 is still running
                c.execute(
                    'RESTORE SNAPSHOT repo.snapshot1 ALL WITH (wait_for_completion = false)'
                )
                try:
                    c.execute('DROP SNAPSHOT repo.snapshot2')
                except ProgrammingError:
                    self.fail(
                        "Restore and Drop Snapshot operation should work in parallel"
                    )

                assert_busy(lambda: self._assert_num_docs(conn, num_docs))

            cluster.stop()
Exemplo n.º 6
0
class MetaDataCompatibilityTest(NodeProvider, unittest.TestCase):

    CLUSTER_SETTINGS = {
        'license.enterprise': 'true',
        'lang.js.enabled': 'true',
        'cluster.name': gen_id(),
    }

    SUPPORTED_VERSIONS = (VersionDef('2.3.x', False,
                                     []), VersionDef('3.3.x', False, []),
                          VersionDef('latest-nightly', False, []))

    def test_metadata_compatibility(self):
        nodes = 3

        cluster = self._new_cluster(self.SUPPORTED_VERSIONS[0].version, nodes,
                                    self.CLUSTER_SETTINGS)
        cluster.start()
        with connect(cluster.node().http_url, error_trace=True) as conn:
            cursor = conn.cursor()
            cursor.execute('''
                CREATE USER user_a;
            ''')
            cursor.execute('''
                GRANT ALL PRIVILEGES ON SCHEMA doc TO user_a;
            ''')
            cursor.execute('''
                CREATE FUNCTION fact(LONG)
                RETURNS LONG
                LANGUAGE JAVASCRIPT
                AS 'function fact(a) { return a < 2 ? 0 : a * (a - 1); }';
            ''')
        self._process_on_stop()

        for version_def in self.SUPPORTED_VERSIONS[1:]:
            self.assert_meta_data(version_def, nodes)

        # restart with latest version
        self.assert_meta_data(self.SUPPORTED_VERSIONS[-1], nodes)

    def assert_meta_data(self, version_def, nodes):
        cluster = self._new_cluster(version_def.version, nodes,
                                    self.CLUSTER_SETTINGS,
                                    prepare_env(version_def.java_home))
        cluster.start()
        with connect(cluster.node().http_url, error_trace=True) as conn:
            cursor = conn.cursor()
            cursor.execute('''
                SELECT name, superuser
                FROM sys.users
                ORDER BY superuser, name;
            ''')
            rs = cursor.fetchall()
            self.assertEqual(['user_a', False], rs[0])
            self.assertEqual(['crate', True], rs[1])
            cursor.execute('''
                SELECT fact(100);
            ''')
            self.assertEqual(9900, cursor.fetchone()[0])
            cursor.execute('''
                SELECT class, grantee, ident, state, type
                FROM sys.privileges
                ORDER BY class, grantee, ident, state, type
            ''')
            self.assertEqual([['SCHEMA', 'user_a', 'doc', 'GRANT', 'DDL'],
                              ['SCHEMA', 'user_a', 'doc', 'GRANT', 'DML'],
                              ['SCHEMA', 'user_a', 'doc', 'GRANT', 'DQL']],
                             cursor.fetchall())

            self._process_on_stop()
Exemplo n.º 7
0
class StorageCompatibilityTest(NodeProvider, unittest.TestCase):

    CLUSTER_SETTINGS = {
        'cluster.name': gen_id(),
    }

    def test_upgrade_paths(self):
        for path in get_test_paths():
            try:
                self.setUp()
                self._test_upgrade_path(path, nodes=3)
            finally:
                self.tearDown()

    def _upgrade(self, cursor, upgrade_segments, num_retries=3):
        """
        Performs the upgrade of the indices and retries in case of
        ProgrammingErrors.

        The retry was added because the wait_for_active shards check
        collects the shard information directly from the nodes. The
        internal ES code, however, retrieves the shard information
        from the ClusterState. A retry is necessary in case the shards
        are ready but the cluster state hasn't been updated yet.
        """
        try:
            if upgrade_segments:
                cursor.execute(
                    'OPTIMIZE TABLE doc.t1 WITH (upgrade_segments = true)')
                cursor.execute(
                    'OPTIMIZE TABLE blob.b1 WITH (upgrade_segments = true)')
        except ProgrammingError as e:
            print(f'OPTIMIZE failed: {e.message} (num_retries={num_retries})')
            if num_retries > 0 and "PrimaryMissingActionException" in e.message:
                time.sleep(1 / (num_retries + 1))
                self._upgrade(cursor, upgrade_segments, num_retries - 1)
            else:
                raise e

    def _test_upgrade_path(self, versions: Tuple[VersionDef], nodes):
        """ Test upgrade path across specified versions.

        Creates a blob and regular table in first version and inserts a record,
        then goes through all subsequent versions - each time verifying that a
        few simple selects work.
        """
        version_def = versions[0]
        env = prepare_env(version_def.java_home)
        cluster = self._new_cluster(version_def.version, nodes,
                                    self.CLUSTER_SETTINGS, env)
        cluster.start()
        digest = None
        with connect(cluster.node().http_url, error_trace=True) as conn:
            c = conn.cursor()
            c.execute(CREATE_ANALYZER)
            c.execute(CREATE_DOC_TABLE)
            c.execute(CREATE_PARTED_TABLE)
            c.execute('''
                INSERT INTO t1 (id, text) VALUES (0, 'Phase queue is foo!')
            ''')
            insert_data(conn, 'doc', 't1', 10)
            c.execute(CREATE_BLOB_TABLE)
            run_selects(c, versions[0].version)
            container = conn.get_blob_container('b1')
            digest = container.put(BytesIO(b'sample data'))
            container.get(digest)
        self._process_on_stop()

        for version_def in versions[1:]:
            self.assert_data_persistence(version_def, nodes, digest)

        # restart with latest version
        version_def = versions[-1]
        self.assert_data_persistence(version_def, nodes, digest)

    def assert_data_persistence(self, version_def, nodes, digest):
        env = prepare_env(version_def.java_home)
        version = version_def.version
        cluster = self._new_cluster(version, nodes, self.CLUSTER_SETTINGS, env)
        cluster.start()
        with connect(cluster.node().http_url, error_trace=True) as conn:
            cursor = conn.cursor()
            wait_for_active_shards(cursor, 0)
            self._upgrade(cursor, version_def.upgrade_segments)
            cursor.execute(
                'ALTER TABLE doc.t1 SET ("refresh_interval" = 4000)')
            run_selects(cursor, version_def.version)
            container = conn.get_blob_container('b1')
            container.get(digest)
            cursor.execute(
                'ALTER TABLE doc.t1 SET ("refresh_interval" = 2000)')

            # older versions had a bug that caused this to fail
            if version in ('latest-nightly', '3.2'):
                # Test that partition and dynamic columns can be created
                obj = {"t_" + version.replace('.', '_'): True}
                args = (str(uuid4()), version, obj)
                cursor.execute(
                    'INSERT INTO doc.parted (id, version, cols) values (?, ?, ?)',
                    args)
        self._process_on_stop()