def test_partitioned_inserts(self, unique_database): """Check that the different ACID write operations take appropriate locks. INSERT INTO: should take a shared lock INSERT OVERWRITE: should take an exclusive lock Both should take PARTITION-level lock in case of static partition insert.""" tbl_name = "%s.test_concurrent_partitioned_inserts" % unique_database self.client.set_configuration_option("SYNC_DDL", "true") self.client.execute(""" CREATE TABLE {0} (i int) PARTITIONED BY (p INT, q INT) TBLPROPERTIES( 'transactional_properties'='insert_only','transactional'='true')""". format(tbl_name)) # Warmup INSERT self.execute_query( "alter table {0} add partition(p=0,q=0)".format(tbl_name)) sleep_sec = 5 task_insert_into = Task(self._impala_role_partition_writer, tbl_name, "p=1,q=1", False, sleep_sec) # INSERT INTO the same partition can run in parallel. duration = run_tasks([task_insert_into, task_insert_into]) assert duration < 3 * sleep_sec task_insert_overwrite = Task(self._impala_role_partition_writer, tbl_name, "p=1,q=1", True, sleep_sec) # INSERT INTO + INSERT OVERWRITE should have mutual exclusion. duration = run_tasks([task_insert_into, task_insert_overwrite]) assert duration > 4 * sleep_sec # INSERT OVERWRITEs to the same partition should have mutual exclusion. duration = run_tasks([task_insert_overwrite, task_insert_overwrite]) assert duration > 4 * sleep_sec task_insert_overwrite_2 = Task(self._impala_role_partition_writer, tbl_name, "p=1,q=2", True, sleep_sec) # INSERT OVERWRITEs to different partitions can run in parallel. duration = run_tasks([task_insert_overwrite, task_insert_overwrite_2]) assert duration < 3 * sleep_sec
def test_iceberg_inserts(self, unique_database): """Issues INSERT statements against multiple impalads in a way that some invariants must be true when a spectator process inspects the table. E.g. if the table contains continuous ranges of integers.""" tbl_name = "%s.test_concurrent_inserts" % unique_database self.client.set_configuration_option("SYNC_DDL", "true") self.client.execute( """create table {0} (wid int, i int) stored as iceberg tblproperties('iceberg.catalog'='hadoop.catalog', 'iceberg.catalog_location'='{1}')""".format( tbl_name, '/test-warehouse/' + unique_database)) counter = Value('i', 0) num_writers = 4 num_checkers = 2 inserts = 30 writers = [ Task(self._impala_role_concurrent_writer, tbl_name, i, inserts, counter) for i in xrange(0, num_writers) ] checkers = [ Task(self._impala_role_concurrent_checker, tbl_name, i, counter, num_writers) for i in xrange(0, num_checkers) ] run_tasks(writers + checkers)
def _run_test_read_impala_inserts(self, unique_database, partitioned): """Check that Impala can read a single insert only ACID table (over)written by Hive several times. Consistency can be checked by using incremental values for overwrites ('run') and inserts ('i'). """ tbl_name = "%s.test_read_impala_inserts" % unique_database self._create_table(tbl_name, partitioned) run_tasks([ Task(self._impala_role_write_inserts, tbl_name, partitioned), Task(self._impala_role_read_inserts, tbl_name, needs_refresh=False, sleep_seconds=0.1)])
def test_inserts(self, unique_database): """Issues INSERT statements against multiple impalads in a way that some invariants must be true when a spectator process inspects the table. E.g. if the table contains continuous ranges of integers.""" tbl_name = "%s.test_concurrent_inserts" % unique_database self.client.set_configuration_option("SYNC_DDL", "true") self.client.execute("drop table if exists %s" % tbl_name) self.client.execute("""create table {0} (wid int, i int)""".format(tbl_name)) counter = Value('i', 0) num_writers = 16 num_checkers = 4 writers = [Task(self._impala_role_concurrent_writer, tbl_name, i, counter) for i in xrange(0, num_writers)] checkers = [Task(self._impala_role_concurrent_checker, tbl_name, i, counter, num_writers) for i in xrange(0, num_checkers)] run_tasks(writers + checkers)
def _run_test_failing_inserts(self, unique_database, partitioned): """Tests that failing INSERTs cannot be observed.""" tbl_name = "%s.test_inserts_fail" % unique_database self.client.set_configuration_option("SYNC_DDL", "true") self.client.execute("drop table if exists %s" % tbl_name) part_expr = "partitioned by (p int)" if partitioned else "" self.client.execute("""create table %s (i int) %s TBLPROPERTIES ( 'transactional_properties' = 'insert_only', 'transactional' = 'true') """ % (tbl_name, part_expr)) counter = Value('i', 0) num_writers = 3 num_checkers = 3 writers = [Task(self._impala_role_insert, tbl_name, partitioned, i, counter) for i in xrange(0, num_writers)] checkers = [Task(self._impala_role_checker, tbl_name, i, counter, num_writers) for i in xrange(0, num_checkers)] run_tasks(writers + checkers)